In [1]:
#import dependencies
import pandas as pd
import sqlite3
from sklearn.preprocessing import LabelEncoder

#display all columns in dfs
pd.set_option('display.max_columns', None)

In [2]:
#connect to SQLite db
conn = sqlite3.connect('../Data-and-DBs/pokedex.db')

In [3]:
#call all entries from the gens_1_to_6 table in the db
training_df = pd.read_sql('SELECT * FROM gens_1_to_6', conn)

In [4]:
#encoding the training data 
le = LabelEncoder()
encoded_training = training_df[training_df.columns[:]].apply(le.fit_transform)

In [5]:
#pull necessary info from the gens_1_to_6 training df to create training data
X_train = encoded_training[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                       'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]
y_train = encoded_training['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_train.shape, y_train.shape)

(817, 13) (817,)


In [6]:
#call all entries from the gen7 and gen_8 table in the db
test_df_1 = pd.read_sql('SELECT * FROM gen_7', conn)
test_df_2 = pd.read_sql('SELECT * FROM gen_8', conn)

In [7]:
#encoding the testing data 
encoded_testing_1 = test_df_1[test_df_1.columns[:]].apply(le.fit_transform)
encoded_testing_2 = test_df_2[test_df_2.columns[:]].apply(le.fit_transform)

In [8]:
#pull necessary info from the gen_7 and gen_8 testing dfs to create test data
X_test_1 = encoded_testing_1[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_1 = encoded_testing_1['LEGENDARY_FLAG']

X_test_2 = encoded_testing_2[['TYPE1', 'TYPE2', 'ABILITY1', 'ABILITY2', 'ABILITY_HIDDEN', 'HP', 'ATK', 'DEF',
                      'SP_ATK', 'SP_DEF', 'SPD', 'TOTAL', 'CAPTURE_RATE']]                      
y_test_2 = encoded_testing_2['LEGENDARY_FLAG']

#double-check shapes of X and y match
print(X_test_1.shape, y_test_1.shape)
print(X_test_2.shape, y_test_2.shape)

(118, 13) (118,)
(117, 13) (117,)


In [9]:
#import random forest test
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)

#fit to the training data
rf = rf.fit(X_train, y_train)

In [10]:
#see the score for the test datasets
print(f'Gen 7 Score: {rf.score(X_test_1, y_test_1)}')
print(f'Gen 8 Score: {rf.score(X_test_2, y_test_2)}')

Gen 7 Score: 0.7372881355932204
Gen 8 Score: 0.8376068376068376


In [11]:
#determine the importance of each X variable
sorted(zip(rf.feature_importances_, X_train.columns), reverse=True)

[(0.4112340453274575, 'CAPTURE_RATE'),
 (0.2086719639002136, 'TOTAL'),
 (0.07891117100915014, 'SP_ATK'),
 (0.057910785799889, 'HP'),
 (0.05674401377575025, 'SPD'),
 (0.04397818666732066, 'ATK'),
 (0.04383810108391526, 'SP_DEF'),
 (0.030431994317870657, 'DEF'),
 (0.026370407761454714, 'ABILITY1'),
 (0.014662059793238374, 'TYPE1'),
 (0.014106524265167336, 'TYPE2'),
 (0.00876828681242844, 'ABILITY_HIDDEN'),
 (0.00437245948614415, 'ABILITY2')]

In [12]:
# Make predictions with the model
predictions_1 = rf.predict(X_test_1)
predictions_2 = rf.predict(X_test_2)

In [13]:
#classification report
from sklearn.metrics import classification_report
gen_7_classification = classification_report(y_test_1, predictions_1, target_names=["Non-Legendary", "Legendary"], output_dict=True, zero_division=1)
gen_8_classification = classification_report(y_test_2, predictions_2, target_names=["Non-Legendary", "Legendary"], output_dict=True, zero_division=1)

In [14]:
print(f'Gen 7 Classification: {gen_7_classification}')
print(f'Gen 8 Classification: {gen_8_classification}')

Gen 7 Classification: {'Non-Legendary': {'precision': 0.7372881355932204, 'recall': 1.0, 'f1-score': 0.848780487804878, 'support': 87}, 'Legendary': {'precision': 1.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 31}, 'accuracy': 0.7372881355932204, 'macro avg': {'precision': 0.8686440677966102, 'recall': 0.5, 'f1-score': 0.424390243902439, 'support': 118}, 'weighted avg': {'precision': 0.8063056592933066, 'recall': 0.7372881355932204, 'f1-score': 0.6257957833815626, 'support': 118}}
Gen 8 Classification: {'Non-Legendary': {'precision': 0.8376068376068376, 'recall': 1.0, 'f1-score': 0.9116279069767441, 'support': 98}, 'Legendary': {'precision': 1.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.8376068376068376, 'macro avg': {'precision': 0.9188034188034189, 'recall': 0.5, 'f1-score': 0.45581395348837206, 'support': 117}, 'weighted avg': {'precision': 0.8639783767988897, 'recall': 0.8376068376068376, 'f1-score': 0.763585768236931, 'support': 117}}


In [15]:
#close the db connection
conn.close()