In [1]:
#import dependencies
import pandas as pd
import sqlite3
from sklearn.preprocessing import LabelEncoder

#display all columns in dfs
pd.set_option('display.max_columns', None)

In [2]:
#connect to SQLite db
conn = sqlite3.connect('../Data-and-DBs/pokedex.db')

In [3]:
#call all entries from the gens_1_to_6 table in the db
training_df = pd.read_sql('SELECT * FROM gens_1_to_6', conn)

In [4]:
#encoding the training data
#F, A. (2017, March 11). convert text columns into numbers in sklearn [web log]. https://stackoverflow.com/questions/34915813/convert-text-columns-into-numbers-in-sklearn. 
le = LabelEncoder()
encoded_training = training_df[training_df.columns[:]].apply(le.fit_transform)

In [5]:
#pull necessary info from the gens_1_to_6 training df to create training data
X_train = encoded_training[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                       'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]
y_train = encoded_training['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_train.shape, y_train.shape)

(817, 13) (817,)


In [6]:
#call all entries from the gen7 and gen_8 table in the db
test_df_1 = pd.read_sql('SELECT * FROM gen_7', conn)
test_df_2 = pd.read_sql('SELECT * FROM gen_8', conn)

In [7]:
#encoding the testing data 
#F, A. (2017, March 11). convert text columns into numbers in sklearn [web log]. https://stackoverflow.com/questions/34915813/convert-text-columns-into-numbers-in-sklearn.
encoded_testing_1 = test_df_1[test_df_1.columns[:]].apply(le.fit_transform)
encoded_testing_2 = test_df_2[test_df_2.columns[:]].apply(le.fit_transform)

In [8]:
#pull necessary info from the gen_7 and gen_8 testing dfs to create test data
X_test_1 = encoded_testing_1[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_1 = encoded_testing_1['LEGENDARY_FLAG']

X_test_2 = encoded_testing_2[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_2 = encoded_testing_2['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_test_1.shape, y_test_1.shape)
print(X_test_2.shape, y_test_2.shape)

(118, 13) (118,)
(117, 13) (117,)


In [9]:
#import dependencies to one-hot encode labels
from tensorflow.keras.utils import to_categorical

#one-hot encode labels
y_train_categorical = to_categorical(y_train)
y_test_categorical_1 = to_categorical(y_test_1)
y_test_categorical_2 = to_categorical(y_test_2)

In [10]:
#import necessary program for scaling
from sklearn.preprocessing import StandardScaler

#create scaler
X_scaler = StandardScaler().fit(X_train)

In [11]:
#scale X data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled_1 = X_scaler.transform(X_test_1)
X_test_scaled_2 = X_scaler.transform(X_test_2)

In [12]:
#import model requirement
from tensorflow.keras.models import Sequential

#create model
model = Sequential()

In [13]:
#import layer requirements
from tensorflow.keras.layers import Dense

#create input layer
number_hidden_nodes = X_train.shape[0]
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=X_train.shape[1]))

In [14]:
#create output layer
model.add(Dense(units=2, activation='sigmoid'))

In [15]:
#summarize model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 817)               11438     
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1636      
Total params: 13,074
Trainable params: 13,074
Non-trainable params: 0
_________________________________________________________________


In [16]:
#compile model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [17]:
#fit model to training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=40,
    shuffle=True,
    verbose=2
)

Epoch 1/40
26/26 - 0s - loss: 0.4338 - accuracy: 0.8764
Epoch 2/40
26/26 - 0s - loss: 0.2227 - accuracy: 0.9058
Epoch 3/40
26/26 - 0s - loss: 0.1648 - accuracy: 0.9437
Epoch 4/40
26/26 - 0s - loss: 0.1349 - accuracy: 0.9670
Epoch 5/40
26/26 - 0s - loss: 0.1135 - accuracy: 0.9657
Epoch 6/40
26/26 - 0s - loss: 0.0998 - accuracy: 0.9682
Epoch 7/40
26/26 - 0s - loss: 0.0879 - accuracy: 0.9718
Epoch 8/40
26/26 - 0s - loss: 0.0802 - accuracy: 0.9780
Epoch 9/40
26/26 - 0s - loss: 0.0723 - accuracy: 0.9792
Epoch 10/40
26/26 - 0s - loss: 0.0658 - accuracy: 0.9792
Epoch 11/40
26/26 - 0s - loss: 0.0614 - accuracy: 0.9841
Epoch 12/40
26/26 - 0s - loss: 0.0577 - accuracy: 0.9804
Epoch 13/40
26/26 - 0s - loss: 0.0534 - accuracy: 0.9841
Epoch 14/40
26/26 - 0s - loss: 0.0476 - accuracy: 0.9829
Epoch 15/40
26/26 - 0s - loss: 0.0476 - accuracy: 0.9865
Epoch 16/40
26/26 - 0s - loss: 0.0445 - accuracy: 0.9841
Epoch 17/40
26/26 - 0s - loss: 0.0419 - accuracy: 0.9853
Epoch 18/40
26/26 - 0s - loss: 0.0370 - 

<tensorflow.python.keras.callbacks.History at 0x1e62db2ce50>

In [18]:
#evaluate models for gen 7 and gen 8
model_loss_1, model_accuracy_1 = model.evaluate(X_test_scaled_1, y_test_categorical_1, verbose=2)
print(f"Gen 7 - Loss: {model_loss_1}, Accuracy: {model_accuracy_1}")
model_loss_2, model_accuracy_2 = model.evaluate(X_test_scaled_2, y_test_categorical_2, verbose=2)
print(f"Gen 8 - Loss: {model_loss_2}, Accuracy: {model_accuracy_2}")

4/4 - 0s - loss: 5.3761 - accuracy: 0.7373
Gen 7 - Loss: 5.3760986328125, Accuracy: 0.7372881174087524
4/4 - 0s - loss: 2.8146 - accuracy: 0.8376
Gen 8 - Loss: 2.814566135406494, Accuracy: 0.8376068472862244


In [19]:
#close database connection
conn.close()