In [1]:
# mlp for binary classification
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense



In [2]:
# load the dataset
path = "/Users/viethnguyen/Documents/pet_projects/Pokemon-Showdown-AI/model_data/game_state/20231209_game_state.csv"
df = read_csv(path)

In [3]:
df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48259 entries, 0 to 48258
Data columns (total 215 columns):
 #    Column                   Dtype
---   ------                   -----
 0    p1_win                   int64
 1    T_ELECTRIC               int64
 2    T_GRASSY                 int64
 3    T_MISTY                  int64
 4    T_PSYCHIC                int64
 5    W_SUN                    int64
 6    W_RAIN                   int64
 7    W_SAND                   int64
 8    W_SNOW                   int64
 9    W_EX_SUN                 int64
 10   W_EX_RAIN                int64
 11   W_EX_WIND                int64
 12   R_TRICK_ROOM             int64
 13   R_WONDER_ROOM            int64
 14   R_MAGIC_ROOM             int64
 15   p1_reflect               int64
 16   p1_light_screen          int64
 17   p1_tailwind              int64
 18   p1_webbed                int64
 19   p1_has_bug               int64
 20   p1_has_dark              int64
 21   p1_has_dragon            int64
 2

In [4]:
features = df.copy()
labels = features.pop('p1_win')

In [5]:
# ensure all data are floating point values
features = features.astype('float32')
labels = labels.astype('float32')

In [6]:
# split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=12)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(38607, 214) (9652, 214) (38607,) (9652,)


In [7]:
# Reserve 200 samples for validation
x_val = X_train[-200:]
y_val = y_train[-200:]
X_train = X_train[:-200]
y_train = y_train[:-200]

In [8]:
# determine the number of input features
n_features = X_train.shape[1]

In [9]:
# estimate number of neurons in hidden layer
n_hidden = X_train.shape[0] / (2 * (X_train.shape[1] + 1))
n_hidden

89.31860465116279

In [10]:
# define model
model = Sequential()
model.add(Dense(90, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
model.add(Dense(1, activation='sigmoid'))

In [11]:
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
# fit the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, verbose=0, validation_data=(x_val, y_val))

In [13]:
# record of the loss values and metric values during training
history.history

{'loss': [0.8463659882545471,
  0.6127411127090454,
  0.5886706113815308,
  0.5687779784202576,
  0.5485730171203613,
  0.5405794978141785,
  0.524630069732666,
  0.5106863379478455,
  0.5078663229942322,
  0.49297186732292175,
  0.4888758659362793,
  0.47535213828086853,
  0.46634921431541443,
  0.45545971393585205,
  0.4498007595539093,
  0.4408493638038635,
  0.43753835558891296,
  0.4293997287750244,
  0.4249267578125,
  0.4192834794521332,
  0.41227203607559204,
  0.40476125478744507,
  0.40280836820602417,
  0.39868003129959106,
  0.3926753103733063,
  0.3875420391559601,
  0.38346487283706665,
  0.37855905294418335,
  0.3729032576084137,
  0.37224188446998596,
  0.36988043785095215,
  0.3630596697330475,
  0.3598427474498749,
  0.3546103537082672,
  0.3499198257923126,
  0.3518040180206299,
  0.34636303782463074,
  0.3441806137561798,
  0.34118184447288513,
  0.33607134222984314,
  0.33587485551834106,
  0.33217132091522217,
  0.3294003903865814,
  0.32796064019203186,
  0.32355

In [14]:
# evaluate the model
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)

Test Accuracy: 0.833


In [15]:
# make a prediction
row = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,43,0,0,0,0,0,0,0,0,0,-1,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,84,0,0,0,0,0,0,0,0,0,0,0,0,91,0,0,0,0,0,0,0,1,1,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0]
yhat = model.predict([row])
print('Predicted: %.3f' % yhat)

Predicted: 0.110


  print('Predicted: %.3f' % yhat)


In [26]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=32)
print("test loss, test acc:", results)

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print("Generate predictions for 3 samples")
predictions = model.predict(X_test[:3])
print("predictions shape:", predictions.shape)
predictions.round(decimals=3)

Evaluate on test data
test loss, test acc: [0.4066351354122162, 0.8330916166305542]
Generate predictions for 3 samples
predictions shape: (3, 1)


array([[0.093],
       [0.918],
       [0.   ]], dtype=float32)