In [1]:
import json
import zipfile
import pandas as pd
import numpy as np
from tqdm import tqdm
from joblib import dump
import os
import logging
import time
from datetime import datetime

# Set up logging
logging.basicConfig(
    filename="data_loading_errors.log",
    level=logging.WARNING,
    format="%(asctime)s - %(levelname)s - %(message)s",
)

# File paths
zip_path = "data/dota_games.zip"
heroes_json_path = "data/heroes.json"
model_save_dir = "models/"
games_to_process = None  # Set to None for all games

# Ensure model save directory exists
os.makedirs(model_save_dir, exist_ok=True)


In [2]:
class ManualLogisticRegression:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = 0
        self.loss_history = []

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

    @staticmethod
    def compute_loss(y_true, y_pred):
        m = len(y_true)
        loss = -np.sum(y_true * np.log(y_pred + 1e-9) + (1 - y_true) * np.log(1 - y_pred + 1e-9)) / m
        return loss

    def train(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for epoch in tqdm(range(self.epochs), desc="Training Model"):
            linear_pred = np.dot(X, self.weights) + self.bias
            y_pred = self.sigmoid(linear_pred)

            dw = np.dot(X.T, (y_pred - y)) / n_samples
            db = np.sum(y_pred - y) / n_samples

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            loss = self.compute_loss(y, y_pred)
            self.loss_history.append(loss)

    def predict_proba(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        return self.sigmoid(linear_pred)

    def predict(self, X):
        probabilities = self.predict_proba(X)
        return [1 if p > 0.5 else 0 for p in probabilities]

    def save_model(self, X_columns, accuracy):
        timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M")
        final_loss = self.loss_history[-1]
        filename = f"manual_logreg_epoch{self.epochs}_acc{accuracy:.2f}_loss{final_loss:.2f}_{timestamp}.joblib"
        model_path = os.path.join(model_save_dir, filename)
        dump({"weights": self.weights, "bias": self.bias, "columns": X_columns}, model_path)
        print(f"Model saved to {model_path}")
        return model_path


In [3]:
class DraftAssistant:
    def __init__(self, zip_path, heroes_json_path, games_to_process=None):
        self.zip_path = zip_path
        self.heroes_json_path = heroes_json_path
        self.games_to_process = games_to_process
        self.hero_mapping = None
        self.X_columns = None
        self.model = None
        self.data = None

    def load_hero_names(self):
        with open(self.heroes_json_path, "r") as f:
            heroes = json.load(f)
        self.hero_mapping = {hero["id"]: hero["api_name"] for hero in heroes}
        print(f"Loaded {len(self.hero_mapping)} heroes.")

    def load_games(self):
        X, y = [], []
        with zipfile.ZipFile(self.zip_path, "r") as zip_file:
            json_files = [name for name in zip_file.namelist() if name.endswith(".json")]
            if self.games_to_process:
                json_files = json_files[:self.games_to_process]

            for file_name in tqdm(json_files, desc="Loading Games"):
                try:
                    with zip_file.open(file_name, "r") as f:
                        game_data = json.load(f)
                        if "result" not in game_data or "players" not in game_data["result"]:
                            logging.warning(f"Missing 'players' in {file_name}")
                            continue

                        radiant_heroes = []
                        dire_heroes = []

                        for player in game_data["result"]["players"]:
                            if player["player_slot"] < 128:
                                radiant_heroes.append(player["hero_id"])
                            else:
                                dire_heroes.append(player["hero_id"])

                        match_features = {f"hero_{hero_id}": 1 for hero_id in radiant_heroes}
                        match_features.update({f"hero_{hero_id}": -1 for hero_id in dire_heroes})
                        X.append(match_features)
                        y.append(1 if game_data["result"]["radiant_win"] else 0)

                except (KeyError, json.JSONDecodeError, TypeError) as e:
                    logging.warning(f"Error processing {file_name}: {e}")
                    continue
        self.data = (X, y)
        print(f"Loaded {len(y)} games.")

    def preprocess_data(self):
        X_raw, y = self.data
        df = pd.DataFrame(X_raw).fillna(0)
        self.X_columns = df.columns
        return df.values, np.array(y)

    def train_model(self, X, y, learning_rate=0.05, epochs=500):
        print("Training model...")
        self.model = ManualLogisticRegression(learning_rate=learning_rate, epochs=epochs)
        self.model.train(X, y)
        y_pred = self.model.predict(X)
        accuracy = np.mean(y == y_pred)
        print(f"Training Accuracy: {accuracy:.2f}")
        return accuracy


In [4]:
assistant = DraftAssistant(zip_path, heroes_json_path, games_to_process)
assistant.load_hero_names()


Loaded 122 heroes.


In [None]:
assistant.load_games()

Loading Games:  16%|█████████▊                                                  | 382778/2338043 [01:28<07:18, 4461.96it/s]

In [None]:
X, y = assistant.preprocess_data()
print(f"Processed data shape: {X.shape}, Target size: {len(y)}")


In [None]:
accuracy = assistant.train_model(X, y, learning_rate=0.05, epochs=500)
assistant.model.save_model(assistant.X_columns, accuracy)


In [None]:
def predict_win_probability(model, radiant_heroes, dire_heroes, feature_columns):
    draft = {f"hero_{hero_id}": 1 for hero_id in radiant_heroes}
    draft.update({f"hero_{hero_id}": -1 for hero_id in dire_heroes})
    draft_df = pd.DataFrame([draft]).reindex(columns=feature_columns, fill_value=0)
    return model.predict_proba(draft_df.values)[0]

# Example draft
radiant_heroes = [7, 9, 44]
dire_heroes = [8, 4, 129]
win_prob = predict_win_probability(assistant.model, radiant_heroes, dire_heroes, assistant.X_columns)
print(f"Win Probability for Radiant: {win_prob:.2f}")


In [None]:
def recommend_next_hero(model, radiant_heroes, dire_heroes, feature_columns, hero_mapping):
    max_prob = 0
    best_hero = None

    # Check all available heroes
    available_heroes = set(hero_mapping.keys()) - set(radiant_heroes) - set(dire_heroes)
    for hero_id in available_heroes:
        test_radiant = radiant_heroes + [hero_id]
        win_prob = predict_win_probability(model, test_radiant, dire_heroes, feature_columns)
        if win_prob > max_prob:
            max_prob = win_prob
            best_hero = hero_id

    return best_hero, max_prob

# Example usage
best_hero, updated_win_prob = recommend_next_hero(
    assistant.model, radiant_heroes, dire_heroes, assistant.X_columns, assistant.hero_mapping
)
print(f"Recommended Hero: {assistant.hero_mapping[best_hero]} (Updated Win Probability: {updated_win_prob:.2f})")
