In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss, accuracy_score
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# Loading data and getting columns needed

In [2]:
path = 'data/RpgCharStats.csv'
df = pd.read_csv(path)
df

Unnamed: 0,Armor,Weapon,Physical,Magic,Level,FBoss,Class
0,45,83,45,89,77,True,Magician
1,48,88,82,65,71,True,Warrior
2,65,22,71,4,37,False,Tank
3,68,3,7,91,39,False,Sorcerer
4,68,9,49,21,27,False,Tank
...,...,...,...,...,...,...,...
995,27,43,73,90,67,True,Battlemage
996,49,69,81,30,50,True,Knight
997,72,63,19,72,61,True,Magician
998,55,37,75,27,66,False,Battlemage


The dataset contains a categorical column "class". If we wanto to include it, we should convert them to numerical values.
1. See all unique values for categorical variables
2. Check if theres any imbalance between the categorical values (if some record appears much more than other)

In [3]:
# check unique classes 
# df['Class'].unique()
df['Class'].value_counts()

Class
Warrior       183
Magician      170
Sorcerer      169
Knight        169
Battlemage    158
Tank          151
Name: count, dtype: int64

In [4]:
# convert categorical variable into dummy/indicator variables
# drop the first to avoid multicollinearity (if others are false, the dropped one must be true)
# convert as integer type
df = pd.get_dummies(df, columns=['Class'], drop_first=True).astype(int)

In [5]:
df

Unnamed: 0,Armor,Weapon,Physical,Magic,Level,FBoss,Class_Knight,Class_Magician,Class_Sorcerer,Class_Tank,Class_Warrior
0,45,83,45,89,77,1,0,1,0,0,0
1,48,88,82,65,71,1,0,0,0,0,1
2,65,22,71,4,37,0,0,0,0,1,0
3,68,3,7,91,39,0,0,0,1,0,0
4,68,9,49,21,27,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
995,27,43,73,90,67,1,0,0,0,0,0
996,49,69,81,30,50,1,1,0,0,0,0
997,72,63,19,72,61,1,0,1,0,0,0
998,55,37,75,27,66,0,0,0,0,0,0


In [6]:
# convert target variable to integer
df['FBoss'] = df['FBoss'].astype(int)
df

Unnamed: 0,Armor,Weapon,Physical,Magic,Level,FBoss,Class_Knight,Class_Magician,Class_Sorcerer,Class_Tank,Class_Warrior
0,45,83,45,89,77,1,0,1,0,0,0
1,48,88,82,65,71,1,0,0,0,0,1
2,65,22,71,4,37,0,0,0,0,1,0
3,68,3,7,91,39,0,0,0,1,0,0
4,68,9,49,21,27,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
995,27,43,73,90,67,1,0,0,0,0,0
996,49,69,81,30,50,1,1,0,0,0,0
997,72,63,19,72,61,1,0,1,0,0,0
998,55,37,75,27,66,0,0,0,0,0,0


In [None]:
df