In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib

# 1. Load Dataset
df = pd.read_csv('magnus_carlsen.csv')
df['date'] = pd.to_datetime(df['date'])

# 2. Feature Engineering
# Encode categorical data into numbers
le = LabelEncoder()
df['color_numeric'] = le.fit_transform(df['player_color'])
# Fill missing previous results with 'Draw' to avoid errors
df['prev_result_numeric'] = le.fit_transform(df['result'].shift(1).fillna('Draw'))

# 3. Define Features (X) and Target (y)
X = df[['player_rating', 'opponent_rating', 'color_numeric', 'prev_result_numeric']]
y = df['result']

# 4. Train/Test Split (80% Training, 20% Testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Build and Train the Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 6. Save the Model for Web App
joblib.dump(model, 'magnus_model.pkl')
print("Model trained and saved as 'magnus_model.pkl' successfully!")

Model trained and saved as 'magnus_model.pkl' successfully!
