In [1]:
#import dependencies
import pandas as pd
import sqlite3
from sklearn.preprocessing import LabelEncoder

#display all columns in dfs
pd.set_option('display.max_columns', None)

In [2]:
#connect to SQLite db
conn = sqlite3.connect('../Data-and-DBs/pokedex.db')

In [3]:
cursor = conn.cursor()
cursor.execute('DROP table IF EXISTS neural_network_results')

<sqlite3.Cursor at 0x243b863d110>

In [4]:
#call all entries from the gens_1_to_6 table in the db
training_df = pd.read_sql('SELECT * FROM gens_1_to_6', conn)

In [5]:
#encoding the training data
#F, A. (2017, March 11). convert text columns into numbers in sklearn [web log]. https://stackoverflow.com/questions/34915813/convert-text-columns-into-numbers-in-sklearn. 
le = LabelEncoder()
encoded_training = training_df[training_df.columns[:]].apply(le.fit_transform)

In [6]:
#pull necessary info from the gens_1_to_6 training df to create training data
X_train = encoded_training[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                       'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]
y_train = encoded_training['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_train.shape, y_train.shape)

(817, 13) (817,)


In [7]:
#call all entries from the gen7 and gen_8 table in the db
test_df_1 = pd.read_sql('SELECT * FROM gen_7', conn)
test_df_2 = pd.read_sql('SELECT * FROM gen_8', conn)

In [8]:
#encoding the testing data 
#F, A. (2017, March 11). convert text columns into numbers in sklearn [web log]. https://stackoverflow.com/questions/34915813/convert-text-columns-into-numbers-in-sklearn.
encoded_testing_1 = test_df_1[test_df_1.columns[:]].apply(le.fit_transform)
encoded_testing_2 = test_df_2[test_df_2.columns[:]].apply(le.fit_transform)

In [9]:
#pull necessary info from the gen_7 and gen_8 testing dfs to create test data
X_test_1 = encoded_testing_1[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_1 = encoded_testing_1['LEGENDARY_FLAG']

X_test_2 = encoded_testing_2[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_2 = encoded_testing_2['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_test_1.shape, y_test_1.shape)
print(X_test_2.shape, y_test_2.shape)

(118, 13) (118,)
(117, 13) (117,)


In [10]:
#import dependencies to one-hot encode labels
from tensorflow.keras.utils import to_categorical

#one-hot encode labels
y_train_categorical = to_categorical(y_train)
y_test_categorical_1 = to_categorical(y_test_1)
y_test_categorical_2 = to_categorical(y_test_2)

In [11]:
#import necessary program for scaling
from sklearn.preprocessing import StandardScaler

#create scaler
X_scaler = StandardScaler().fit(X_train)

In [12]:
#scale X data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled_1 = X_scaler.transform(X_test_1)
X_test_scaled_2 = X_scaler.transform(X_test_2)

In [13]:
#import model requirement
from tensorflow.keras.models import Sequential

#create model
model = Sequential()

In [14]:
#import layer requirements
from tensorflow.keras.layers import Dense

#create input layer
number_hidden_nodes = X_train.shape[0]
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=X_train.shape[1]))

In [15]:
#create output layer
model.add(Dense(units=2, activation='sigmoid'))

In [16]:
#summarize model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 817)               11438     
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1636      
Total params: 13,074
Trainable params: 13,074
Non-trainable params: 0
_________________________________________________________________


In [17]:
#compile model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [18]:
#fit model to training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=40,
    shuffle=True,
    verbose=2
)

Epoch 1/40
26/26 - 0s - loss: 0.4361 - accuracy: 0.8617
Epoch 2/40
26/26 - 0s - loss: 0.2258 - accuracy: 0.9082
Epoch 3/40
26/26 - 0s - loss: 0.1670 - accuracy: 0.9400
Epoch 4/40
26/26 - 0s - loss: 0.1346 - accuracy: 0.9621
Epoch 5/40
26/26 - 0s - loss: 0.1151 - accuracy: 0.9621
Epoch 6/40
26/26 - 0s - loss: 0.1006 - accuracy: 0.9718
Epoch 7/40
26/26 - 0s - loss: 0.0891 - accuracy: 0.9731
Epoch 8/40
26/26 - 0s - loss: 0.0781 - accuracy: 0.9755
Epoch 9/40
26/26 - 0s - loss: 0.0725 - accuracy: 0.9804
Epoch 10/40
26/26 - 0s - loss: 0.0651 - accuracy: 0.9816
Epoch 11/40
26/26 - 0s - loss: 0.0637 - accuracy: 0.9816
Epoch 12/40
26/26 - 0s - loss: 0.0567 - accuracy: 0.9816
Epoch 13/40
26/26 - 0s - loss: 0.0505 - accuracy: 0.9853
Epoch 14/40
26/26 - 0s - loss: 0.0487 - accuracy: 0.9865
Epoch 15/40
26/26 - 0s - loss: 0.0455 - accuracy: 0.9841
Epoch 16/40
26/26 - 0s - loss: 0.0447 - accuracy: 0.9853
Epoch 17/40
26/26 - 0s - loss: 0.0388 - accuracy: 0.9890
Epoch 18/40
26/26 - 0s - loss: 0.0361 - 

<tensorflow.python.keras.callbacks.History at 0x243c03af220>

In [19]:
#evaluate models for gen 7 and gen 8
model_loss_1, model_accuracy_1 = model.evaluate(X_test_scaled_1, y_test_categorical_1, verbose=2)
print(f"Gen 7 - Loss: {model_loss_1}, Accuracy: {model_accuracy_1}")
model_loss_2, model_accuracy_2 = model.evaluate(X_test_scaled_2, y_test_categorical_2, verbose=2)
print(f"Gen 8 - Loss: {model_loss_2}, Accuracy: {model_accuracy_2}")

4/4 - 0s - loss: 5.5125 - accuracy: 0.7373
Gen 7 - Loss: 5.512453079223633, Accuracy: 0.7372881174087524
4/4 - 0s - loss: 2.8647 - accuracy: 0.8376
Gen 8 - Loss: 2.864698886871338, Accuracy: 0.8376068472862244


In [20]:
#convert results to dataframe
neural_network_results = pd.DataFrame({
    'Loss': [model_loss_1, model_loss_2],
    'Accuracy': [model_accuracy_1, model_accuracy_2]
}, index=['Gen 7', 'Gen 8'])
neural_network_results

Unnamed: 0,Loss,Accuracy
Gen 7,5.512453,0.737288
Gen 8,2.864699,0.837607


In [21]:
#convert results to sql table
neural_network_results.to_sql('neural_network_results', conn, index=False)

In [22]:
#close database connection
conn.close()