In [None]:
from dataclasses import asdict
import lzma
import pickle

import numpy as np
import pandas as pd
import plotly.express as px
from sklearn import linear_model, svm
from sklearn.linear_model import LinearRegression, Perceptron
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.cross_decomposition import PLSCanonical, PLSRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, mean_absolute_error, r2_score, confusion_matrix
from sklearn.neural_network import MLPRegressor, MLPClassifier
pd.options.plotting.backend = "plotly"

In [None]:
from combat import CombatDataset, CombatUnit

In [None]:
INPUT_PATH = "dataset.xz"
with lzma.open(INPUT_PATH, "rb") as f:
    dataset: CombatDataset = pickle.load(f)

In [None]:
dataset.combats[:1]

In [None]:
setups = pd.DataFrame([c.setup for c in dataset.combats])

In [None]:
def aggregate_field(f, agg):
    setups[f] = setups["units"].map(lambda r: agg([u[f] for u in r]))
    setups["enemy_" + f] = setups["enemy_units"].map(lambda r: agg([u[f] for u in r]))
aggregate_field("ground_dps", np.mean)
aggregate_field("air_dps", np.mean)
aggregate_field("ground_range", np.mean)
aggregate_field("air_range", np.mean)
aggregate_field("health", np.sum)

In [None]:
outcomes = pd.DataFrame([c.outcome for c in dataset.combats])

In [None]:
all_unit_ids = [v.value for v in dataset.unit_types]
def to_vector(units: list[CombatUnit]) -> np.ndarray:
    df_units = pd.DataFrame(units)
    df_units["unit"] = df_units["unit"].map(lambda v: v.value)
    unit_aggregation = df_units.groupby("unit").sum().reindex(all_unit_ids).fillna(0.0)
    vector = (unit_aggregation["health"] + unit_aggregation["shield"]) / 100
    return vector.to_numpy()

In [None]:
# army = np.stack([to_vector(c.setup.units) for c in dataset.combats])
# enemy_army = np.stack([to_vector(c.setup.enemy_units) for c in dataset.combats])

In [None]:
df = pd.concat((setups, outcomes), axis=1)

In [None]:
df[["health", "enemy_health", "winner_health"]].plot.box()

In [None]:
df.plot.scatter(x="health", y="enemy_health", color="win")

In [None]:
df.plot.hist(x="result", color="win")

In [None]:

# X = np.concatenate((army, enemy_army), 1)
# X = np.stack([np.outer(a, b).flatten() for a, b in zip(army, enemy_army)])
X = setups.drop(columns=["units", "enemy_units"])
y = df["result"].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)
X_test

In [None]:
def train(exponent):
    # model = MLPClassifier(solver="lbfgs", max_iter=1000, random_state=42)
    def transform_health(df):
        health_pow = df["health"] ** exponent
        enemy_health_pow = df["enemy_health"] ** exponent
        df = df.drop(columns=["health", "enemy_health"])
        return pd.concat([df, health_pow, enemy_health_pow], axis="columns")
    model = MLPRegressor(hidden_layer_sizes=[1], solver="lbfgs")
    model.fit(transform_health(X_train), y_train)
    y_pred = model.predict(transform_health(X_test))
    return y_pred

In [None]:
px.scatter(x=y_test, y=train(2))

In [None]:
exponents = np.linspace(0, 3, 100)
errors = [mean_absolute_error(y_test, train(e)) for e in exponents]
px.scatter(x=exponents, y=errors)

In [None]:
y_pred = train(2)

In [None]:
mean_absolute_error(y_test, y_pred)

In [None]:
f1_score(y_test > 0, y_pred > 0)

In [None]:
r2_score(y_test, y_pred)

In [None]:
confusion_matrix(y_test > 0, y_pred > 0, normalize="all")

In [None]:
px.imshow(features)

In [None]:
weights = model.coefs_[0]
model_summary = pd.DataFrame(
    weights.reshape((2, -1)).T,
    index=dataset.unit_types,
    columns=["self", "enemy"],
)
model_summary