In [1]:
#import dependencies
import pandas as pd
import sqlite3
from sklearn.preprocessing import LabelEncoder

#display all columns in dfs
pd.set_option('display.max_columns', None)

In [2]:
#connect to SQLite db
conn = sqlite3.connect('../Data-and-DBs/pokedex.db')

In [3]:
cursor = conn.cursor()
cursor.execute('DROP table IF EXISTS random_forest_results')

<sqlite3.Cursor at 0x214c941f340>

In [4]:
#call all entries from the gens_1_to_6 table in the db
training_df = pd.read_sql('SELECT * FROM gens_1_to_6', conn)

In [5]:
#encoding the training data 
le = LabelEncoder()
encoded_training = training_df[training_df.columns[:]].apply(le.fit_transform)

In [6]:
#pull necessary info from the gens_1_to_6 training df to create training data
X_train = encoded_training[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                       'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]
y_train = encoded_training['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_train.shape, y_train.shape)

(817, 13) (817,)


In [7]:
#call all entries from the gen7 and gen_8 table in the db
test_df_1 = pd.read_sql('SELECT * FROM gen_7', conn)
test_df_2 = pd.read_sql('SELECT * FROM gen_8', conn)

In [8]:
#encoding the testing data 
encoded_testing_1 = test_df_1[test_df_1.columns[:]].apply(le.fit_transform)
encoded_testing_2 = test_df_2[test_df_2.columns[:]].apply(le.fit_transform)

In [9]:
#pull necessary info from the gen_7 and gen_8 testing dfs to create test data
X_test_1 = encoded_testing_1[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_1 = encoded_testing_1['LEGENDARY_FLAG']

X_test_2 = encoded_testing_2[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_2 = encoded_testing_2['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_test_1.shape, y_test_1.shape)
print(X_test_2.shape, y_test_2.shape)

(118, 13) (118,)
(117, 13) (117,)


In [10]:
#import random forest test
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)

#fit to the training data
rf = rf.fit(X_train, y_train)

In [11]:
#see the score for the test datasets
gen_7_score = rf.score(X_test_1, y_test_1)
gen_8_score = rf.score(X_test_2, y_test_2)

print(f'Gen 7 Score: {gen_7_score}')
print(f'Gen 8 Score: {gen_8_score}')

Gen 7 Score: 0.7372881355932204
Gen 8 Score: 0.8376068376068376


In [12]:
#determine the importance of each X variable
sorted(zip(rf.feature_importances_, X_train.columns), reverse=True)

[(0.38405625736219207, 'CAPTURE_RATE'),
 (0.21952156326502675, 'TOTAL'),
 (0.09507359208769714, 'SP_ATK'),
 (0.06236626032553068, 'SPD'),
 (0.058345262330768366, 'HP'),
 (0.04614785078369108, 'SP_DEF'),
 (0.0371709525977252, 'ATK'),
 (0.029959862299982532, 'DEF'),
 (0.025464864304070894, 'ABILITY1'),
 (0.014267155606231392, 'TYPE1'),
 (0.013842028572919354, 'TYPE2'),
 (0.009695150170483427, 'ABILITY_HIDDEN'),
 (0.004089200293681243, 'ABILITY2')]

In [13]:
# Make predictions with the model
predictions_1 = rf.predict(X_test_1)
predictions_2 = rf.predict(X_test_2)

In [14]:
#calculate r2
from sklearn.metrics import r2_score

gen_7_r2 = r2_score(y_test_1, predictions_1)
gen_8_r2 = r2_score(y_test_2, predictions_2)

print('Gen 7 r2 ', gen_7_r2)
print('Gen 8 r2 ', gen_8_r2)

Gen 7 r2  -0.3563218390804599
Gen 8 r2  -0.19387755102040827


In [15]:
random_forest_results = pd.DataFrame({'Generation': ['Gen 7', 'Gen 8'],
                           'Accuracy': [gen_7_score, gen_8_score],
                           'R2': [gen_7_r2, gen_8_r2]})
random_forest_results

Unnamed: 0,Generation,Accuracy,R2
0,Gen 7,0.737288,-0.356322
1,Gen 8,0.837607,-0.193878


In [16]:
#convert results to sql table
random_forest_results.to_sql('random_forest_results', conn, index=False)

In [17]:
#close the db connection
conn.close()