In [None]:
import pandas as pd
import numpy as np 
import csv 
import os
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Define a function to parse the rows, ignoring the number after the team name
def parse_row(row):
    # Split the row by tabs since it is a tab-delimited file
    data = row.strip().split("\t")
    
    # Extracting the rank and removing the number after the team name
    rank = data[0]
    team = ' '.join(data[1].split()[:-1])  # Remove the number after the team name
    # Continue extracting other pieces of data
    conf = data[2]
    w_l = data[3]
    wins = int(w_l.split('-')[0])
    losses = int(w_l.split('-')[1])
    WP = 0
    if(losses == 0):
        WP = 100
    else: 
        WP = wins/(wins+losses)
    adjem = data[4]
    adjo = data[5]
    adjd = data[6]
    adjt = data[7]
    luck = data[8]
    adjem_rank = data[9]
    oppo = data[10]
    oppd = data[11]
    adjem2 = data[12]
    
    return {
        'Rank': rank,
        'Team': team,
        'Conference': conf,
        'Wins': wins,
        'Losses':losses,
        'Win%':WP,
        'AdjEM': adjem,
        'Offensive Effeciency': adjo,
        'Defensive Effeciency': adjd,
        'Tempo': adjt,
        'Luck': luck,
        'Schedule Strength': adjem_rank,
        'Opponent Offensive Effeciency': oppo,
        'Opponent Defensive Effeciency': oppd,
        'Non Confrence Schedule Strength': adjem2
    } 


In [None]:
def read_file(filename):
    with open(filename) as f:
        raw_lines = f.readlines()
        parsed_data = [parse_row(row) for row in raw_lines[1:]]
        parsed_df = pd.DataFrame(parsed_data)
        return parsed_df

In [None]:
data19 = read_file('data19.txt')
data20 = read_file('data20.txt')
data21 = read_file('data21.txt')
data22 = read_file('data22.txt')
data23 = read_file('data23.txt')
data19['Year'] = 2019
data20['Year'] = 2020
data21['Year'] = 2021
data22['Year'] = 2022
data23['Year'] = 2023
data19 = data19.rename(columns={"W-L": "W-L(19)", "wins": "W19","losses":"L19","WP":"WP19"})
data20 = data20.rename(columns={"W-L": "W-L(20)", "wins": "W20","losses":"L20","WP":"WP20"})
data21 = data21.rename(columns={"W-L": "W-L(21)", "wins": "W21","losses":"L21","WP":"WP21"})
data22 = data22.rename(columns={"W-L": "W-L(22)", "wins": "W22","losses":"L22","WP":"WP22"})
data23 = data23.rename(columns={"W-L": "W-L(23)", "wins": "W23","losses":"L23","WP":"WP23"})
combined = pd.concat([data19,data20,data21,data22,data23],ignore_index=True)
numeric_columns = ['Rank','AdjEM','Offensive Effeciency','Defensive Effeciency','Tempo','Luck','Schedule Strength','Opponent Offensive Effeciency','Opponent Defensive Effeciency','Non Confrence Schedule Strength']
for i in numeric_columns: 
    combined[i] = pd.to_numeric(combined[i],errors='coerce')
combined['Team'] = combined['Team'].astype(str)
combined['Year'] = combined['Year'].astype(int)
imputer = KNNImputer(n_neighbors=5)
combined[numeric_columns] = imputer.fit_transform(combined[numeric_columns])


In [152]:
base_weights = [0.4, 0.3, 0.15, 0.1, 0.05]  # The weights for the 5 most recent years

team_weighted_win = {}
