<a href="https://colab.research.google.com/github/willstokesv/NFLMock/blob/main/DRAFT_PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **WHO DOES OUR MODEL HAVE EACH TEAM DRAFTING ACCORDING TO THE ANALYSTS?**

---



In [74]:
# %%
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np

# %%
# Initialize DataFrames
jeremiah = pd.DataFrame()
kiper = pd.DataFrame()
bucky = pd.DataFrame()
tannenbaum = pd.DataFrame()
draft_results = pd.DataFrame()

# Load mock drafts (2021–2025) and real draft results (2021–2024)
for year in ['21', '22', '23', '24', '25']:
    year_int = 2000 + int(year)

    # Load mocks
    cur_jeremiah = pd.read_csv(f'jeremiah{year}.csv')
    cur_jeremiah['year'] = year_int
    jeremiah = pd.concat([jeremiah, cur_jeremiah])

    cur_kiper = pd.read_csv(f'kiper{year}.csv')
    cur_kiper['year'] = year_int
    kiper = pd.concat([kiper, cur_kiper])

    cur_bucky = pd.read_csv(f'brooks{year}.csv')
    cur_bucky['year'] = year_int
    bucky = pd.concat([bucky, cur_bucky])

    cur_tannenbaum = pd.read_csv(f'tannenbaum{year}.csv')
    cur_tannenbaum['year'] = year_int
    tannenbaum = pd.concat([tannenbaum, cur_tannenbaum])

    # Load actual draft results only for 2021–2024
    if year != '25':
        cur_draft_results = pd.read_csv(f'nfldraft{year}.csv')
        cur_draft_results['year'] = year_int
        draft_results = pd.concat([draft_results, cur_draft_results])

# Keep relevant columns
draft_results = draft_results[['Pick', 'Tm', 'Player', 'year']]
jeremiah = jeremiah.rename(columns={'Pick': 'Pick_jeremiah'})
kiper = kiper.rename(columns={'Pick': 'Pick_kiper'})
bucky = bucky.rename(columns={'Pick': 'Pick_bucky'})
tannenbaum = tannenbaum.rename(columns={'Pick': 'Pick_tannenbaum'})

# %%
# Merge mock drafts with draft results
weighted_avg = draft_results.copy()
weighted_avg = weighted_avg.merge(jeremiah, on=['year', 'Player'], how='left', suffixes=('', '_jeremiah'))
weighted_avg = weighted_avg.merge(kiper, on=['year', 'Player'], how='left', suffixes=('', '_kiper'))
weighted_avg = weighted_avg.merge(bucky, on=['year', 'Player'], how='left', suffixes=('', '_bucky'))
weighted_avg = weighted_avg.merge(tannenbaum, on=['year', 'Player'], how='left', suffixes=('', '_tannenbaum'))

# Handle missing picks using year-specific fill values
FILL_NA_BY_YEAR = {
    2021: 33,
    2022: 33,
    2023: 33,  # Only 31 picks in 2023, but assume early Day 2 starts at 33
    2024: 33
}
for col in ['Pick_jeremiah', 'Pick_kiper', 'Pick_bucky', 'Pick_tannenbaum']:
    weighted_avg[col] = weighted_avg.apply(
        lambda row: row[col] if pd.notna(row[col]) else FILL_NA_BY_YEAR[row['year']],
        axis=1
    )
# Drop rows with NaN values in the 'Pick' column before fitting the model
weighted_avg.dropna(subset=['Pick'], inplace=True)

# Train linear regression model
X = weighted_avg[['Pick_jeremiah', 'Pick_kiper', 'Pick_bucky', 'Pick_tannenbaum']]
y = weighted_avg['Pick']
reg = LinearRegression().fit(X, y)

# Show learned weights
coefficients = pd.concat([
    pd.DataFrame(X.columns, columns=['Source']),
    pd.DataFrame(reg.coef_, columns=['Weight'])
], axis=1)
print("Learned Weights:")
print(coefficients.to_string(index=False))

# --- Model Performance ---
from sklearn.metrics import mean_squared_error, r2_score
predicted_train = reg.predict(X)
print("\nModel Performance:")
print("R²:", round(r2_score(y, predicted_train), 4))
print("RMSE:", round(np.sqrt(mean_squared_error(y, predicted_train)), 4))

# --- Standardized Model for Fair Weight Comparison ---
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
reg_scaled = LinearRegression().fit(X_scaled, y)

standardized_coefficients = pd.DataFrame({
    'Source': X.columns,
    'Standardized Weight': reg_scaled.coef_
})
print("\nStandardized Analyst Weights (after feature scaling):")
print(standardized_coefficients.to_string(index=False))


# %%
# Predict 2025 draft order
jeremiah_25 = jeremiah[jeremiah['year'] == 2025].drop(columns=['year'])
kiper_25 = kiper[kiper['year'] == 2025].drop(columns=['year'])
bucky_25 = bucky[bucky['year'] == 2025].drop(columns=['year'])
tannenbaum_25 = tannenbaum[tannenbaum['year'] == 2025].drop(columns=['year'])

# Merge all mocks for 2025, specifying suffixes to avoid conflicts
all_mocks_25 = jeremiah_25.merge(kiper_25, on='Player', how='outer', suffixes=('_jeremiah', '_kiper'))
all_mocks_25 = all_mocks_25.merge(bucky_25, on='Player', how='outer', suffixes=('', '_bucky'))
all_mocks_25 = all_mocks_25.merge(tannenbaum_25, on='Player', how='outer', suffixes=('', '_tannenbaum'))

# Fill missing mock data for 2025
FILL_NA_PICK_2025 = 33
for col in ['Pick_jeremiah', 'Pick_kiper', 'Pick_bucky', 'Pick_tannenbaum']:
    all_mocks_25[col] = all_mocks_25[col].fillna(FILL_NA_PICK_2025)

# Predict and rank 2025 prospects
all_mocks_25['predicted_raw'] = reg.predict(
    all_mocks_25[['Pick_jeremiah', 'Pick_kiper', 'Pick_bucky', 'Pick_tannenbaum']]
)
all_mocks_25['predicted'] = all_mocks_25['predicted_raw'].rank()
all_mocks_25.sort_values(by='predicted', inplace=True)

# 🔁 FIX: Assign team names by rounding predicted pick to nearest int and merging
all_mocks_25['predicted_round'] = all_mocks_25['predicted'].round().astype(int)

# Merge with draft order DataFrame to get team abbreviations
all_mocks_25 = all_mocks_25.merge(
    df[['Pick', 'Abbreviation']],
    left_on='predicted_round',
    right_on='Pick',
    how='left'
).drop(columns=['Pick'])

# Assign team and clean up
all_mocks_25['Tm'] = all_mocks_25['Abbreviation']
all_mocks_25.drop(columns=['Abbreviation', 'predicted_round'], inplace=True)

# 🔽 Only keep top 32 predicted picks
top_32 = all_mocks_25[all_mocks_25['predicted'] <= 32].copy()
top_32.sort_values(by='predicted', inplace=True)

# Show results with team abbreviation next to player
print("\n2025 Predicted Draft Order (Top 32 Only):")
print(
    top_32[
        ['Player', 'Tm', 'predicted', 'Pick_jeremiah', 'Pick_kiper', 'Pick_bucky', 'Pick_tannenbaum']
    ].to_string(index=False))


Learned Weights:
         Source   Weight
  Pick_jeremiah 2.342850
     Pick_kiper 1.347919
     Pick_bucky 1.396471
Pick_tannenbaum 1.424453

Model Performance:
R²: 0.2451
RMSE: 65.0571

Standardized Analyst Weights (after feature scaling):
         Source  Standardized Weight
  Pick_jeremiah            14.374058
     Pick_kiper             8.261218
     Pick_bucky             8.237100
Pick_tannenbaum             8.426528

2025 Predicted Draft Order (Top 32 Only):
            Player  Tm  predicted  Pick_jeremiah  Pick_kiper  Pick_bucky  Pick_tannenbaum
          Cam Ward TEN        1.0            2.0         2.0         1.0              1.0
      Abdul Carter CLE        2.0            1.0         1.0         4.0              5.0
     Travis Hunter NYG        3.0            3.0         3.0         3.0              2.0
      Mason Graham  NE        4.0            5.0         5.0         5.0              3.0
     Will Campbell JAX        5.0            4.0         4.0         9.0        

2024 Accuracy

In [69]:
import pandas as pd
import numpy as np
import re
# 🔧 Function to normalize player names (handles Jr, Sr, etc., punctuation, and spacing)
def normalize_name(name):
    if pd.isna(name):
        return ''
    name = name.replace('.', '').replace(',', '').strip()
    name = re.sub(r'\s+', ' ', name)  # Collapse multiple spaces
    # Remove common suffixes at the end of names
    for suffix in [' Jr', ' Sr', ' II', ' III', ' IV']:
        if name.endswith(suffix):
            name = name[:-len(suffix)].strip()
    return name.lower()

# 📥 Load actual 2024 draft results (limit to first round)
draft_2024 = pd.read_csv('nfldraft24.csv')[['Player', 'Tm', 'Pick']]
draft_2024 = draft_2024[draft_2024['Pick'] <= 32]
draft_2024['year'] = 2024
draft_2024['Player_clean'] = draft_2024['Player'].apply(normalize_name)

# 📥 Load final mock drafts for 2024 and normalize names
jeremiah_24 = pd.read_csv('jeremiah24.csv')[['Player', 'Pick']].rename(columns={'Pick': 'Pick_jeremiah'})
jeremiah_24['Player_clean'] = jeremiah_24['Player'].apply(normalize_name)

kiper_24 = pd.read_csv('kiper24.csv')[['Player', 'Pick']].rename(columns={'Pick': 'Pick_kiper'})
kiper_24['Player_clean'] = kiper_24['Player'].apply(normalize_name)

bucky_24 = pd.read_csv('brooks24.csv')[['Player', 'Pick']].rename(columns={'Pick': 'Pick_bucky'})
bucky_24['Player_clean'] = bucky_24['Player'].apply(normalize_name)

tannenbaum_24 = pd.read_csv('tannenbaum24.csv')[['Player', 'Pick']].rename(columns={'Pick': 'Pick_tannenbaum'})
tannenbaum_24['Player_clean'] = tannenbaum_24['Player'].apply(normalize_name)

# 🔗 Merge mock drafts with actual picks using cleaned names
compare_df = draft_2024.copy()
compare_df = compare_df.merge(jeremiah_24[['Player_clean', 'Pick_jeremiah']], on='Player_clean', how='left')
compare_df = compare_df.merge(kiper_24[['Player_clean', 'Pick_kiper']], on='Player_clean', how='left')
compare_df = compare_df.merge(bucky_24[['Player_clean', 'Pick_bucky']], on='Player_clean', how='left')
compare_df = compare_df.merge(tannenbaum_24[['Player_clean', 'Pick_tannenbaum']], on='Player_clean', how='left')

# 📏 Calculate delta between actual pick and mock drafts
for analyst in ['jeremiah', 'kiper', 'bucky', 'tannenbaum']:
    pick_col = f'Pick_{analyst}'
    delta_col = f'Delta_{analyst}'
    compare_df[delta_col] = compare_df.apply(
        lambda row: abs(row['Pick'] - row[pick_col]) if not pd.isna(row[pick_col]) else None,
        axis=1
    )

# 🧹 Clean up
compare_df.drop(columns=['Player_clean'], inplace=True)

# 📊 Display results
compare_df.sort_values(by='Pick', inplace=True)
print("\n📊 2024 FIRST ROUND: Analyst Mock Comparison vs Actual Draft")
print(compare_df[[
    'Tm', 'Player', 'Pick',
    'Pick_jeremiah',
    'Pick_kiper',
    'Pick_bucky',
    'Pick_tannenbaum'
]].to_string(index=False))


📊 2024 FIRST ROUND: Analyst Mock Comparison vs Actual Draft
 Tm              Player  Pick  Pick_jeremiah  Pick_kiper  Pick_bucky  Pick_tannenbaum
CHI      Caleb Williams     1            1.0         1.0         1.0              1.0
WAS      Jayden Daniels     2            2.0         2.0         2.0              3.0
NWE          Drake Maye     3            3.0         3.0        11.0              2.0
ARI Marvin Harrison Jr.     4            4.0         4.0         4.0              9.0
LAC             Joe Alt     5            7.0         7.0         5.0              5.0
NYG        Malik Nabers     6            6.0         6.0         6.0              6.0
TEN           JC Latham     7            5.0        11.0        14.0              7.0
ATL       Michael Penix     8           13.0         NaN         NaN             19.0
CHI         Rome Odunze     9           10.0         9.0         9.0             14.0
MIN       J.J. McCarthy    10           11.0         5.0         3.0           

# **Ranking each analyst by the accuracy of their predictions compared to where the player was actually drafted.**

In [57]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np

# Step 1: Initialize DataFrames
jeremiah = pd.DataFrame()
kiper = pd.DataFrame()
bucky = pd.DataFrame()
tannenbaum = pd.DataFrame()
draft_results = pd.DataFrame()

# Step 2: Load data for 2022–2025 (draft results only 2022–2024)
for year in ['22', '23', '24', '25']:
    year_int = 2000 + int(year)

    # Load mocks
    cur_jeremiah = pd.read_csv(f'jeremiah{year}.csv')
    cur_jeremiah['year'] = year_int
    jeremiah = pd.concat([jeremiah, cur_jeremiah])

    cur_kiper = pd.read_csv(f'kiper{year}.csv')
    cur_kiper['year'] = year_int
    kiper = pd.concat([kiper, cur_kiper])

    cur_bucky = pd.read_csv(f'brooks{year}.csv')
    cur_bucky['year'] = year_int
    bucky = pd.concat([bucky, cur_bucky])

    cur_tannenbaum = pd.read_csv(f'tannenbaum{year}.csv')
    cur_tannenbaum['year'] = year_int
    tannenbaum = pd.concat([tannenbaum, cur_tannenbaum])

    # Draft results only for 2022–2024
    if year != '25':
        cur_draft_results = pd.read_csv(f'nfldraft{year}.csv')
        cur_draft_results['year'] = year_int
        draft_results = pd.concat([draft_results, cur_draft_results])

# Step 3: Clean and merge
draft_results = draft_results[['Pick', 'Tm', 'Player', 'year']]
jeremiah = jeremiah.rename(columns={'Pick': 'Pick_jeremiah'})
kiper = kiper.rename(columns={'Pick': 'Pick_kiper'})
bucky = bucky.rename(columns={'Pick': 'Pick_bucky'})
tannenbaum = tannenbaum.rename(columns={'Pick': 'Pick_tannenbaum'})

weighted_avg = draft_results.copy()
weighted_avg = weighted_avg.merge(jeremiah, on=['year', 'Player'], how='left', suffixes=('', '_jeremiah'))
weighted_avg = weighted_avg.merge(kiper, on=['year', 'Player'], how='left', suffixes=('', '_kiper'))
weighted_avg = weighted_avg.merge(bucky, on=['year', 'Player'], how='left', suffixes=('', '_bucky'))
weighted_avg = weighted_avg.merge(tannenbaum, on=['year', 'Player'], how='left', suffixes=('', '_tannenbaum'))

# Step 4: Fill missing mock picks with estimated value (early 2nd round)
FILL_NA_BY_YEAR = {2022: 33, 2023: 33, 2024: 33}
for col in ['Pick_jeremiah', 'Pick_kiper', 'Pick_bucky', 'Pick_tannenbaum']:
    weighted_avg[col] = weighted_avg.apply(
        lambda row: row[col] if pd.notna(row[col]) else FILL_NA_BY_YEAR[row['year']],
        axis=1
    )

# Step 5: Drop rows with no actual pick
weighted_avg.dropna(subset=['Pick'], inplace=True)

# Step 6: Ranking function
def rank_analysts_all_years(weighted_avg, years):
    analysts = ['jeremiah', 'kiper', 'bucky', 'tannenbaum']
    all_years_results = []

    for year in years:
        year_data = weighted_avg[weighted_avg['year'] == year]
        results = {}

        for analyst in analysts:
            correct_picks = 0
            total_delta = 0

            for _, row in year_data.iterrows():
                actual = row['Pick']
                predicted = row[f'Pick_{analyst}']

                if pd.notna(predicted):
                    delta = abs(actual - predicted)
                    if delta == 0:
                        correct_picks += 1
                    total_delta += delta

            weighted_score = correct_picks * 1000 - total_delta

            results[analyst] = {
                'Year': year,
                'Analyst': analyst.capitalize(),
                'Correct Picks': correct_picks,
                'Total Delta': total_delta,
                'Weighted Score': weighted_score
            }

        # Create DataFrame, rank and append to results
        year_df = pd.DataFrame.from_dict(results, orient='index')
        year_df = year_df.sort_values(by=['Weighted Score', 'Total Delta'], ascending=[False, True])
        year_df['Rank'] = range(1, len(year_df) + 1)
        all_years_results.append(year_df)

    final_df = pd.concat(all_years_results).reset_index(drop=True)
    return final_df

# Step 7: Run for all available years (2022–2024)
ranked_all_years = rank_analysts_all_years(weighted_avg, [2022, 2023, 2024])

# Step 8: Display results year-by-year with spacing
print("\n📊 Year-by-Year Analyst Accuracy Rankings:\n")
for year in [2022, 2023, 2024]:
    print(f"====== Year: {year} ======")
    display_df = ranked_all_years[ranked_all_years['Year'] == year][[
        'Rank', 'Analyst', 'Correct Picks', 'Total Delta', 'Weighted Score'
    ]]
    print(display_df.to_string(index=False))
    print("\n")



📊 Year-by-Year Analyst Accuracy Rankings:

 Rank    Analyst  Correct Picks  Total Delta  Weighted Score
    1      Kiper              7      26498.0        -19498.0
    2   Jeremiah              4      26502.0        -22502.0
    3 Tannenbaum              3      26645.0        -23645.0
    4      Bucky              2      26589.0        -24589.0


 Rank    Analyst  Correct Picks  Total Delta  Weighted Score
    1      Bucky              6      25945.0        -19945.0
    2   Jeremiah              5      25963.0        -20963.0
    3 Tannenbaum              3      26039.0        -23039.0
    4      Kiper              1      25951.0        -24951.0


 Rank    Analyst  Correct Picks  Total Delta  Weighted Score
    1      Kiper              9      25419.0        -16419.0
    2 Tannenbaum              8      25455.0        -17455.0
    3   Jeremiah              7      25380.0        -18380.0
    4      Bucky              7      25461.0        -18461.0




## **Pre vs Post Combine Mocks (Mel Kiper)**



In [48]:
import pandas as pd
import os

def analyze_mock_movements(analyst: str, years):
    """
    Analyze pre- vs post-combine mock draft differences for a given analyst.

    Args:
        analyst (str): Analyst name (e.g., 'kiper').
        years (list): List of years to analyze.

    Returns:
        tuple: (all_changes_df, same_position_df)
    """
    all_changes = []
    all_same_position = []

    for year in years:
        short_year = str(year)[2:]
        pre_file = f"{analyst}{short_year}(pre-combine).csv"
        final_file = f"{analyst}{short_year}.csv"

        if not os.path.exists(pre_file) or not os.path.exists(final_file):
            print(f"⚠️ Missing files for {analyst} {year}. Skipping.")
            continue

        # Load data
        pre_df = pd.read_csv(pre_file)
        final_df = pd.read_csv(final_file)

        # Merge and compute difference
        merged = pd.merge(pre_df, final_df, on="Player", suffixes=("_pre", "_final"), how="outer")
        merged["Pick_Difference"] = abs(merged["Pick_pre"] - merged["Pick_final"])
        merged["Year"] = year
        merged["Analyst"] = analyst.capitalize()

        # Append to respective lists
        all_changes.append(merged)
        same_position = merged[merged["Pick_Difference"] == 0]
        all_same_position.append(same_position)

    # Combine results
    changes_df = pd.concat(all_changes).sort_values(by="Pick_Difference", ascending=False) if all_changes else pd.DataFrame()
    same_df = pd.concat(all_same_position).sort_values(by="Year") if all_same_position else pd.DataFrame()

    return changes_df, same_df


# Define years to check
years_to_check = [2022, 2023, 2024]

# Run the analysis for Kiper
biggest_changes_df, same_position_df = analyze_mock_movements("kiper", years_to_check)

# Show more biggest risers/fallers (change head() to desired number)
print("\n📊 Players with the Biggest Mock Draft Changes (All Years):")
print(biggest_changes_df[['Year', 'Analyst', 'Player', 'Pick_pre', 'Pick_final', 'Pick_Difference']].head(25))

# Show players whose projected pick didn’t change
print("\n✅ Players Who Stayed in the Same Position:")
print(same_position_df[['Year', 'Analyst', 'Player', 'Pick_pre', 'Pick_final']])



📊 Players with the Biggest Mock Draft Changes (All Years):
    Year Analyst              Player  Pick_pre  Pick_final  Pick_Difference
34  2023   Kiper    Will McDonald IV       8.0        31.0             23.0
36  2022   Kiper       Travon Walker      24.0         1.0             23.0
2   2023   Kiper      Bijan Robinson      27.0        10.0             17.0
3   2022   Kiper     Arnold Ebiketie      16.0        30.0             14.0
21  2023   Kiper      Lukas Van Ness      24.0        11.0             13.0
3   2023   Kiper     Broderick Jones      31.0        19.0             12.0
1   2023   Kiper      Anton Harrison      13.0        23.0             10.0
35  2023   Kiper         Zay Flowers      22.0        12.0             10.0
24  2022   Kiper   Kayvon Thibodeaux       2.0        12.0             10.0
14  2024   Kiper         Jared Verse      11.0        21.0             10.0
38  2022   Kiper      Trevor Penning      25.0        16.0              9.0
27  2022   Kiper       Kyle 

# **Pre vs Post Mock Daniel Jeremiah**

In [49]:
def analyze_mock_movements(analyst: str, years):
    """
    Analyze pre- vs post-combine mock draft differences for a given analyst.

    Args:
        analyst (str): Analyst name (e.g., 'jeremiah').
        years (list): List of years to analyze.

    Returns:
        tuple: (all_changes_df, same_position_df)
    """
    all_changes = []
    all_same_position = []

    for year in years:
        short_year = str(year)[2:]
        pre_file = f"{analyst}{short_year}(pre-combine).csv"
        final_file = f"{analyst}{short_year}.csv"

        if not os.path.exists(pre_file) or not os.path.exists(final_file):
            print(f"⚠️ Missing files for {analyst} {year}. Skipping.")
            continue

        # Load data
        pre_df = pd.read_csv(pre_file)
        final_df = pd.read_csv(final_file)

        # Merge and compute difference
        merged = pd.merge(pre_df, final_df, on="Player", suffixes=("_pre", "_final"), how="outer")
        merged["Pick_Difference"] = abs(merged["Pick_pre"] - merged["Pick_final"])
        merged["Year"] = year
        merged["Analyst"] = analyst.capitalize()

        # Append to respective lists
        all_changes.append(merged)
        same_position = merged[merged["Pick_Difference"] == 0]
        all_same_position.append(same_position)

    # Combine results
    changes_df = pd.concat(all_changes).sort_values(by="Pick_Difference", ascending=False) if all_changes else pd.DataFrame()
    same_df = pd.concat(all_same_position).sort_values(by="Year") if all_same_position else pd.DataFrame()

    return changes_df, same_df


# Define years to check
years_to_check = [2022, 2023, 2024]

# Run the analysis for Daniel Jeremiah
biggest_changes_df, same_position_df = analyze_mock_movements("jeremiah", years_to_check)

# Show top risers/fallers
print("\n📊 Players with the Biggest Mock Draft Changes (All Years) — Daniel Jeremiah:")
print(biggest_changes_df[['Year', 'Analyst', 'Player', 'Pick_pre', 'Pick_final', 'Pick_Difference']].head(25))

# Show players whose pick stayed the same
print("\n✅ Players Who Stayed in the Same Position — Daniel Jeremiah:")
print(same_position_df[['Year', 'Analyst', 'Player', 'Pick_pre', 'Pick_final']])


📊 Players with the Biggest Mock Draft Changes (All Years) — Daniel Jeremiah:
    Year   Analyst              Player  Pick_pre  Pick_final  Pick_Difference
27  2023  Jeremiah        Myles Murphy       6.0        29.0             23.0
28  2023  Jeremiah         Nolan Smith      31.0        10.0             21.0
19  2022  Jeremiah    Jameson Williams      27.0         8.0             19.0
21  2022  Jeremiah        Jordan Davis      32.0        14.0             18.0
36  2023  Jeremiah         Zay Flowers      29.0        11.0             18.0
4   2022  Jeremiah       Charles Cross      22.0         6.0             16.0
6   2022  Jeremiah         David Ojabo      15.0        31.0             16.0
26  2024  Jeremiah        Nate Wiggins      15.0        30.0             15.0
17  2024  Jeremiah         Jared Verse      12.0        26.0             14.0
10  2023  Jeremiah      Darnell Wright      17.0        31.0             14.0
30  2022  Jeremiah        Malik Willis      20.0        32.0    

## **Pre vs Post Mock Bucky Brooks**

In [59]:
def analyze_mock_movements(analyst: str, years):
    """
    Analyze pre- vs post-combine mock draft differences for a given analyst.

    Args:
        analyst (str): Analyst name (e.g., 'brooks').
        years (list): List of years to analyze.

    Returns:
        tuple: (all_changes_df, same_position_df)
    """
    all_changes = []
    all_same_position = []

    for year in years:
        short_year = str(year)[2:]
        pre_file = f"{analyst}{short_year}(pre-combine).csv"
        final_file = f"{analyst}{short_year}.csv"

        if not os.path.exists(pre_file) or not os.path.exists(final_file):
            print(f"⚠️ Missing files for {analyst} {year}. Skipping.")
            continue

        # Load data
        pre_df = pd.read_csv(pre_file)
        final_df = pd.read_csv(final_file)

        # Merge and compute difference
        merged = pd.merge(pre_df, final_df, on="Player", suffixes=("_pre", "_final"), how="outer")
        merged["Pick_Difference"] = abs(merged["Pick_pre"] - merged["Pick_final"])
        merged["Year"] = year
        merged["Analyst"] = analyst.capitalize()

        # Append to respective lists
        all_changes.append(merged)
        same_position = merged[merged["Pick_Difference"] == 0]
        all_same_position.append(same_position)

    # Combine results
    changes_df = pd.concat(all_changes).sort_values(by="Pick_Difference", ascending=False) if all_changes else pd.DataFrame()
    same_df = pd.concat(all_same_position).sort_values(by="Year") if all_same_position else pd.DataFrame()

    return changes_df, same_df

# Define years to check
years_to_check = [2022, 2023, 2024]

# Run the analysis for Bucky Brooks
biggest_changes_df, same_position_df = analyze_mock_movements("brooks", years_to_check)

# Show top risers/fallers
print("\n📊 Players with the Biggest Mock Draft Changes (All Years) — Bucky Brooks:")
print(biggest_changes_df[['Year', 'Analyst', 'Player', 'Pick_pre', 'Pick_final', 'Pick_Difference']].head(25))

# Show players whose pick stayed the same
print("\n✅ Players Who Stayed in the Same Position — Bucky Brooks:")
print(same_position_df[['Year', 'Analyst', 'Player', 'Pick_pre', 'Pick_final']])



📊 Players with the Biggest Mock Draft Changes (All Years) — Bucky Brooks:
    Year Analyst              Player  Pick_pre  Pick_final  Pick_Difference
35  2022  Brooks       Travon Walker      32.0         4.0             28.0
24  2023  Brooks      Lukas Van Ness      30.0         8.0             22.0
28  2023  Brooks    O'Cyrus Torrence       9.0        27.0             18.0
5   2023  Brooks        Bryan Bresee      13.0        29.0             16.0
29  2024  Brooks        Nate Wiggins      17.0        31.0             14.0
21  2022  Brooks    Jameson Williams      25.0        12.0             13.0
12  2022  Brooks         Devin Lloyd       9.0        21.0             12.0
13  2023  Brooks   Devon Witherspoon      18.0         7.0             11.0
17  2022  Brooks    George Karlaftis      12.0        23.0             11.0
25  2023  Brooks       Michael Mayer      15.0        26.0             11.0
9   2023  Brooks  Christian Gonzalez      16.0         5.0             11.0
10  2023  Bro

In [62]:
import pandas as pd

# Assuming you already have this DataFrame from earlier analysis:
# ranked_all_years = rank_analysts_all_years(weighted_avg, [2022, 2023, 2024])

# Step 1: Aggregate average metrics per analyst
summary = ranked_all_years.groupby("Analyst").agg({
    "Correct Picks": "mean",
    "Total Delta": "mean",
    "Weighted Score": "mean"
}).reset_index()

# Step 2: Calculate "Reliability" as inverse of average delta
summary["Reliability"] = 1 / summary["Total Delta"]

# Step 3: Normalize reliability to get prediction weights
summary["Weight"] = summary["Reliability"] / summary["Reliability"].sum()

# Optional: Round values for clarity
summary["Correct Picks"] = summary["Correct Picks"].round(2)
summary["Total Delta"] = summary["Total Delta"].round(2)
summary["Weighted Score"] = summary["Weighted Score"].round(2)
summary["Reliability"] = summary["Reliability"].round(6)
summary["Weight"] = summary["Weight"].round(4)

# Step 4: Display or export
print("\n📊 Analyst Accuracy Summary (2022–2024):")
print(summary.to_string(index=False))

# Optional: Save to CSV
# summary.to_csv("analyst_accuracy_summary.csv", index=False)



📊 Analyst Accuracy Summary (2022–2024):
   Analyst  Correct Picks  Total Delta  Weighted Score  Reliability  Weight
     Bucky           5.00     25998.33       -20998.33     0.000038  0.2499
  Jeremiah           5.33     25948.33       -20615.00     0.000039  0.2504
     Kiper           5.67     25956.00       -20289.33     0.000039  0.2503
Tannenbaum           4.67     26046.33       -21379.67     0.000038  0.2494


📊 Column Breakdown
Column	Meaning
Analyst	The name of the draft analyst (e.g., Kiper, Jeremiah, etc.).
Correct Picks	Number of players each analyst correctly matched to the exact pick number (i.e., mock = actual).
Total Delta	The sum of absolute errors between each analyst's mock pick and the actual pick — lower is better.
Weighted Score	A customized metric (likely combining correct picks, delta, etc.) — in this case, more negative is worse.
Reliability	A scaled/normalized metric showing consistency or inverse variance — higher is better.
Weight	The weight assigned to each analyst in your linear regression model for predicting future picks — this is based on the historical performance (probably derived from reliability or error-based loss).
🔍 What It Tells Us
Correct Picks:

Kiper (5.67) slightly edges out the rest in exact matches.

Tannenbaum is lowest here (4.67), meaning fewer perfect hits.

Total Delta:

Jeremiah has the lowest delta (25,948.33) — meaning his mocks were closest overall to the actual draft.

Tannenbaum again has the highest delta, suggesting his mocks were furthest off on average.

Weighted Score:

Kiper scores best (least negative), suggesting his mocks had the most value according to your custom metric.

Tannenbaum again fares the worst.

Reliability & Weight:

All analysts have very similar reliability and weights (~0.25) — that’s expected if you’re doing a linear regression with standardized features and no regularization. The model sees them as nearly equally useful predictors.

Jeremiah barely edges out the others in model weight (0.2504), suggesting he was the most useful to the model, albeit by a tiny margin.

🧠 TL;DR
Kiper had the best performance in terms of correct picks and weighted score.

Jeremiah had the lowest total error, making him most consistent.

Tannenbaum was the weakest performer in all metrics.

The model finds all four analysts similarly valuable, which is reflected in the nearly equal weights.

# **Availability at Pick X**

In [76]:
from scipy.stats import norm

# Estimate RMSE from training data
rmse = np.sqrt(mean_squared_error(y, reg.predict(X)))
print(f"\nEstimated RMSE from training data: {round(rmse, 2)} picks")

# Function to compute availability probability
def probability_available_at_pick(predicted_pick, target_pick, error_std=rmse):
    # Probability player is available at *or after* target_pick
    return norm.cdf(target_pick, loc=predicted_pick, scale=error_std)

# 🔁 MAIN FUNCTION: Input a pick number and get availability probabilities
def check_availability_at_pick(pick_num=18):
    top_32['Prob_available'] = top_32['predicted_raw'].apply(
        lambda x: probability_available_at_pick(x, pick_num)
    )
    top_32_sorted = top_32.sort_values(by='Prob_available', ascending=False)

    print(f"\n📍 Probability Each Player is Available at Pick {pick_num}:")
    print(top_32_sorted[
        ['Player', 'Tm', 'predicted', 'Prob_available']
    ].to_string(index=False))

# 🧪 Example: Check for pick 18
check_availability_at_pick(18)



Estimated RMSE from training data: 65.06 picks

📍 Probability Each Player is Available at Pick 18:
            Player  Tm  predicted  Prob_available
          Cam Ward TEN        1.0        0.892134
      Abdul Carter CLE        2.0        0.873421
     Travis Hunter NYG        3.0        0.867874
      Mason Graham  NE        4.0        0.825906
     Will Campbell JAX        5.0        0.794291
   Shedeur Sanders  LV        6.0        0.779880
      Jalon Walker NYJ        7.0        0.723759
      Tyler Warren CAR        8.0        0.691262
     Armand Membou  NO        9.0        0.608763
     Ashton Jeanty CHI       10.0        0.600270
      Will Johnson  SF       11.0        0.514433
      Tyler Booker DAL       12.0        0.443954
        Mike Green MIA       13.0        0.439092
    Mykel Williams IND       14.0        0.391947
   Omarion Hampton ATL       15.0        0.382593
      Josh Simmons ARI       16.0        0.322054
 Tetairoa McMillan CIN       17.0        0.285659


In [83]:
from scipy.stats import norm
from sklearn.metrics import mean_squared_error

# --- Estimate model RMSE from training data ---
rmse = np.sqrt(mean_squared_error(y, reg.predict(X)))
print(f"\n📏 Estimated RMSE from training data: {round(rmse, 2)} picks")

# --- Define probability function ---
def probability_available_at_pick(predicted_pick, target_pick, error_std):
    """
    Returns the probability a player is available at the given pick,
    assuming a normal distribution of error around their predicted pick.
    """
    return 1 - norm.cdf(target_pick - 1, loc=predicted_pick, scale=error_std)

# --- Prompt user for the pick they want to evaluate ---
target_pick = int(input("🔢 Enter the pick number you're selecting at (e.g. 18): "))

# --- Compute availability probabilities for each player ---
all_mocks_25['Prob_available'] = all_mocks_25['predicted_raw'].apply(
    lambda x: probability_available_at_pick(x, target_pick, rmse)
)

# --- Sort players by model rank (predicted) ---
all_mocks_25.sort_values(by='predicted', inplace=True)

# --- Display results ---
print(f"\n🎯 Probability that each player is available at pick {target_pick}:")
print(
    all_mocks_25[
        ['Player', 'Tm', 'predicted', 'Prob_available']
    ].to_string(index=False)
)



📏 Estimated RMSE from training data: 65.06 picks
🔢 Enter the pick number you're selecting at (e.g. 18): 2

🎯 Probability that each player is available at pick 2:
            Player  Tm  predicted  Prob_available
          Cam Ward TEN        1.0        0.164372
      Abdul Carter CLE        2.0        0.189050
     Travis Hunter NYG        3.0        0.196250
      Mason Graham  NE        4.0        0.249266
     Will Campbell JAX        5.0        0.287709
   Shedeur Sanders  LV        6.0        0.304858
      Jalon Walker NYJ        7.0        0.369667
      Tyler Warren CAR        8.0        0.405893
     Armand Membou  NO        9.0        0.494101
     Ashton Jeanty CHI       10.0        0.502898
      Will Johnson  SF       11.0        0.589058
      Tyler Booker DAL       12.0        0.656254
        Mike Green MIA       13.0        0.660775
    Mykel Williams IND       14.0        0.703868
   Omarion Hampton ATL       15.0        0.712256
      Josh Simmons ARI       16.0    

✅ Do the numbers make sense?
Cam Ward – predicted pick = 1.0

Probability he’s available at pick 2 = ~16.4%

✅ That makes sense — most of the time, he’s expected to go at #1.

Abdul Carter – predicted pick = 2.0

Availability at pick 2 = ~18.9%

✅ He’s expected to go right at #2, so the chance he falls to pick 2 is low (i.e., he would’ve already been taken at 1 in most scenarios).

Travis Hunter – predicted pick = 3.0

Availability at pick 2 = ~19.6%

✅ Slightly more likely than Carter to still be there at 2, because he’s projected slightly later.

As the predicted pick increases (from 4 → 5 → 10 → 15…), the probability of still being available at pick 2 increases — as it should.

**return 1 - norm.cdf(target_pick - 1, loc=predicted_pick, scale=error_std)**
…is the key part that calculates the probability a player has not yet been taken before your desired pick.

x is their predicted pick

target_pick is the user-input pick (e.g. 2, 18, etc.)

rmse is the standard deviation used to simulate uncertainty