In [1]:
import math
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score

from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Binarizer

import warnings
warnings.filterwarnings("ignore")

In [2]:
dnd_data = pd.read_csv("../Data/dnd.csv")

In [3]:
# Remove extra columns.
to_drop = [
    "languages", "speed", "swim", "fly", "climb", "burrow",
    "passive_perception", "darkvision", "truesight", "tremorsense", "blindsight",
    "str_mod", "dex_mod", "con_mod", "int_mod", "wis_mod", "cha_mod",
    "str_save", "dex_save", "con_save", "int_save", "wis_save", "cha_save",
    "history", "perception", "stealth", "persuasion", "insight", "deception", "arcana", "religion", "acrobatics", "athletics", "intimidation",
    "senses", "attributes", "actions", "legendary_actions", "source",
]
dnd_clean = dnd_data.drop(to_drop, axis=1)

# Clean the creature types.
for index_itr in dnd_clean.index:
    test = dnd_clean.loc[index_itr, "type"].split(" (", 1)[0]
    dnd_clean.loc[index_itr, "type"] = test

# Turn the CR floats to ints.
for index_itr in dnd_clean.index:
    dnd_clean.loc[index_itr, "cr"] = math.floor(dnd_clean.loc[index_itr, "cr"])
dnd_clean["cr"] = dnd_clean["cr"].astype(int)

# Split the alignments.
algn_map = {
    "any alignment": ("any", "any"),
    "any chaotic alignment": ("chaotic", "any"),
    "any evil alignment": ("any", "evil"),
    "any non-good alignment": ("any", "non-good"),
    "any non-lawful alignment": ("non-lawful", "any"),
    "chaotic good (75 %) or neutral evil (25 %)": ("non-lawful", "non-neutral"),
    "neutral good (50 %) or neutral evil (50 %)": ("neutral", "non-neutral"),
}
for index_itr in dnd_clean.index:
    if dnd_clean.loc[index_itr, "alignment"] in algn_map.keys():
        dnd_clean.loc[index_itr, "law"] = algn_map[dnd_clean.loc[index_itr, "alignment"]][0]
        dnd_clean.loc[index_itr, "moral"] = algn_map[dnd_clean.loc[index_itr, "alignment"]][1]
    elif " " in dnd_clean.loc[index_itr, "alignment"]:
        dnd_clean.loc[index_itr, "law"] = dnd_clean.loc[index_itr, "alignment"].split(" ")[0]
        dnd_clean.loc[index_itr, "moral"] = dnd_clean.loc[index_itr, "alignment"].split(" ")[1]
    else:
        dnd_clean.loc[index_itr, "law"] = dnd_clean.loc[index_itr, "alignment"]
        dnd_clean.loc[index_itr, "moral"] = dnd_clean.loc[index_itr, "alignment"]
        
# Reset alignment to new alignment types.
for index_itr in dnd_clean.index:
    dnd_clean.loc[index_itr, "alignment"] = dnd_clean.loc[index_itr, "law"] + " " + dnd_clean.loc[index_itr, "moral"]

# Turn the AC floats to ints.
dnd_clean["ac"] = dnd_clean["ac"].astype(int)

# Rename columns.
dnd_clean = dnd_clean.rename(columns={"alignment": "align", "intel": "int", "legendary": "legen", "strength": "str"})    

dnd_clean.head()

Unnamed: 0,name,size,type,align,ac,hp,cr,str,dex,con,int,wis,cha,legen,law,moral
0,aarakocra,medium,humanoid,neutral good,12,13,0,10,14,10,11,12,11,0,neutral,good
1,abjurer,medium,humanoid,any any,12,84,9,9,14,14,18,12,11,0,any,any
2,aboleth,large,aberration,lawful evil,17,135,10,21,9,15,18,15,18,1,lawful,evil
3,abominable-yeti,huge,monstrosity,chaotic evil,15,137,9,24,10,22,9,13,9,0,chaotic,evil
4,acererak,medium,undead,neutral evil,21,285,23,13,16,20,27,21,20,0,neutral,evil


In [4]:
def uniq_list(target_col):
    return list(set(list(dnd_clean[target_col])))

# Create an ordinal size column for size and type.
uniq_align = uniq_list("align")
uniq_size = uniq_list("size")
uniq_type = uniq_list("type")
uniq_law = uniq_list("law")
uniq_moral = uniq_list("moral")

for index_itr in dnd_clean.index:
    dnd_clean.loc[index_itr, "nalign"] = uniq_align.index(dnd_clean.loc[index_itr, "align"])
    dnd_clean.loc[index_itr, "nsize"] = uniq_size.index(dnd_clean.loc[index_itr, "size"])
    dnd_clean.loc[index_itr, "ntype"] = uniq_type.index(dnd_clean.loc[index_itr, "type"])
    dnd_clean.loc[index_itr, "nlaw"] = uniq_law.index(dnd_clean.loc[index_itr, "law"])
    dnd_clean.loc[index_itr, "nmoral"] = uniq_moral.index(dnd_clean.loc[index_itr, "moral"])

dnd_clean["nalign"] = dnd_clean["nalign"].astype(np.uint8)
dnd_clean["nsize"] = dnd_clean["nsize"].astype(np.uint8)
dnd_clean["ntype"] = dnd_clean["ntype"].astype(np.uint8)
dnd_clean["nlaw"] = dnd_clean["nlaw"].astype(np.uint8)
dnd_clean["nmoral"] = dnd_clean["nmoral"].astype(np.uint8)

dnd_clean.head()

Unnamed: 0,name,size,type,align,ac,hp,cr,str,dex,con,...,wis,cha,legen,law,moral,nalign,nsize,ntype,nlaw,nmoral
0,aarakocra,medium,humanoid,neutral good,12,13,0,10,14,10,...,12,11,0,neutral,good,9,4,5,0,4
1,abjurer,medium,humanoid,any any,12,84,9,9,14,14,...,12,11,0,any,any,15,4,5,1,1
2,aboleth,large,aberration,lawful evil,17,135,10,21,9,15,...,15,18,1,lawful,evil,14,1,10,5,6
3,abominable-yeti,huge,monstrosity,chaotic evil,15,137,9,24,10,22,...,13,9,0,chaotic,evil,10,5,0,4,6
4,acererak,medium,undead,neutral evil,21,285,23,13,16,20,...,21,20,0,neutral,evil,13,4,8,0,6


In [5]:
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('binirizer', Binarizer())
])

# Get the list without the item.
def not_target(not_get):
    default_list = ["ac", "hp", "cr", "str", "dex", "con", "int", "wis", "cha", "legen", "nsize", "nlaw", "nmoral"]
    # Account for cheaty data.
    if not_get == "nalign":
        default_list.remove("nlaw")
        default_list.remove("nmoral")
    else:
        default_list.remove(not_get)
    return default_list

# Get all columns not the target, and the target, and split.
def dnd_prep(target_get):
    dnd_x = dnd_clean[dnd_clean.columns & not_target(target_get)].to_numpy()
    dnd_y = dnd_clean[target_get].to_numpy()
    dnd_xtrain, dnd_xtest, dnd_ytrain, dnd_ytest = train_test_split(dnd_x, dnd_y, test_size=0.2, random_state=42)
    return pipe.fit_transform(dnd_xtrain), pipe.fit_transform(dnd_xtest), dnd_ytrain, dnd_ytest

In [6]:
# Train 4 various models on the target stat and evaluate.
def giga_grid(target_get):
    dnd_xtrain, dnd_xtest, dnd_ytrain, dnd_ytest = dnd_prep(target_get)
    
    knc_grid = [{"weights":["uniform", "distance"], "metric":["euclidean", "manhattan"], "algorithm":["ball_tree", "kd_tree", "brute"], "leaf_size":[30, 40, 50, 60]}]
    knc_model = KNeighborsClassifier()
    knc_search = GridSearchCV(knc_model, knc_grid, cv=5, scoring="accuracy")
    
    rfc_grid = [{"criterion":["gini", "entropy"], "n_estimators":[25, 50, 75, 100], "max_depth":[10, 15, 20, 25], "min_samples_split":[4, 6, 8, 10]}]
    rfc_model = RandomForestClassifier()
    rfc_search = GridSearchCV(rfc_model, rfc_grid, cv=5, scoring="accuracy")
    
    sgd_grid = [{"penalty":["l2", "l1", "elasticnet"], "alpha":[1e-4, 1e-3, 1e-2, 1e-1, 1e0]}]
    sgd_model = SGDClassifier()
    sgd_search = GridSearchCV(sgd_model, sgd_grid, cv=5, scoring="accuracy")
    
    svc_grid = [{"kernel":["linear", "rbf", "sigmoid"], "C":[0.1, 1, 10, 100, 1000], "gamma":[1, 0.1, 0.01, 0.001, 0.0001]}]
    svc_model = SVC()
    svc_search = GridSearchCV(svc_model, svc_grid, cv=5, scoring="accuracy")
    
    model_list = [rfc_search, knc_search, sgd_search, svc_search]
    model_best = []
    score_best = []
    
    for itr_model in model_list:
        itr_model.fit(dnd_xtrain, dnd_ytrain)
        model_best.append(itr_model.best_estimator_)
        score_best.append(itr_model.best_score_)
        
    return model_best, score_best, dnd_xtest, dnd_ytest

In [None]:
# BEGIN THE DATA CHUNKING!

hit_list = ["ac", "hp", "cr", "str", "dex", "con", "int", "wis", "cha", "legen", "nalign", "nsize", "ntype", "nlaw", "nmoral"]

for itr_hit in hit_list:
    model_best, score_best, dnd_xtest, dnd_ytest = giga_grid(itr_hit)
    print("##########  ", itr_hit, "  ##########\n")
    print(str(score_best) + "\n" + str(model_best) + "\n")
    target_score = max(score_best)
    target_model = model_best[score_best.index(target_score)]
    print("{:.2f}".format(target_score*100) + "%:", str(target_model))
    y_hat = target_model.predict(dnd_xtest)
    print("Accuracy against unseen data:", "{:.2f}".format(accuracy_score(dnd_ytest, y_hat)*100) + "%")
    print()