In [1]:
import pandas as pd
import sqlite3
import json
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import os

In [2]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("../Data-and-DBs/pokedex.db")

cursor = con.cursor()
cursor.execute('DROP table IF EXISTS svc_results')

train_df = pd.read_sql_query("SELECT * from gens_1_to_6", con)

# Verify that result of SQL query is stored in the dataframe
train_df.head()

Unnamed: 0,NUMBER,CODE,SERIAL,NAME,TYPE1,TYPE2,COLOR,ABILITY1,ABILITY2,ABILITY_HIDDEN,...,HEIGHT,WEIGHT,HP,ATK,DEF,SP_ATK,SP_DEF,SPD,TOTAL,CAPTURE_RATE
0,1,1,11,Bulbasaur,Grass,Poison,Green,Overgrow,,Chrolophyll,...,0.7,6.9,45,49,49,65,65,45,318,45
1,2,1,21,Ivysaur,Grass,Poison,Green,Overgrow,,Chrolophyll,...,1.0,13.0,60,62,63,80,80,60,405,45
2,3,1,31,Venusaur,Grass,Poison,Green,Overgrow,,Chrolophyll,...,2.0,100.0,80,82,83,100,100,80,525,45
3,3,2,32,Mega Venusaur,Grass,Poison,Green,Thick Fat,,,...,2.4,155.5,80,100,123,122,120,80,625,45
4,4,1,41,Charmander,Fire,,Red,Blaze,,Solar Power,...,0.6,8.5,39,52,43,60,50,65,309,45


In [3]:
from sklearn.preprocessing import LabelEncoder
#encoding the training data
le = LabelEncoder()
encoded_training = train_df[train_df.columns[:]].apply(le.fit_transform)

In [4]:
target = encoded_training["LEGENDARY_FLAG"]
target_names = ["Non-Legendary", "Legendary"]

In [5]:
data = encoded_training.drop(["NUMBER", "CODE", "SERIAL", "NAME", "COLOR", "GENERATION", "HEIGHT", "WEIGHT", "LEGENDARY_FLAG"], axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,TYPE1,TYPE2,ABILITY1,ABILITY2,ABILITY_HIDDEN,HP,ATK,DEF,SP_ATK,SP_DEF,SPD,TOTAL,CAPTURE_RATE
0,9,14,82,53,11,17,24,23,41,37,24,57,5
1,9,14,82,53,11,31,37,37,55,50,38,94,5
2,9,14,82,53,11,51,57,55,71,66,58,164,5
3,9,14,138,53,70,51,73,86,86,77,58,187,5
4,6,12,9,53,107,11,27,18,36,22,43,51,5


In [6]:
#call all entries from the gen7 and gen_8 table in the db
test_1_df = pd.read_sql('SELECT * FROM gen_7', con)
test_2_df = pd.read_sql('SELECT * FROM gen_8', con)

In [7]:
#encoding the test 1 & 2 data 
encoded_test_1 = test_1_df[test_1_df.columns[:]].apply(le.fit_transform)
encoded_test_2 = test_2_df[test_2_df.columns[:]].apply(le.fit_transform)

In [8]:
test_1_target = encoded_test_1["LEGENDARY_FLAG"]
test_1_target_names = ["Non-Legendary", "Legendary"]

test_2_target = encoded_test_2["LEGENDARY_FLAG"]
test_2_target_names = ["Non-Legendary", "Legendary"]

In [9]:
test_1_data = encoded_test_1.drop(["NUMBER", "CODE", "SERIAL", "NAME", "COLOR", "GENERATION", "HEIGHT", "WEIGHT", "LEGENDARY_FLAG"], axis=1)
feature_names = data.columns
test_1_data.head()

Unnamed: 0,TYPE1,TYPE2,ABILITY1,ABILITY2,ABILITY_HIDDEN,HP,ATK,DEF,SP_ATK,SP_DEF,SPD,TOTAL,CAPTURE_RATE
0,1,13,20,2,33,2,12,3,2,3,26,5,19
1,1,13,20,2,33,28,21,22,6,24,29,25,12
2,3,15,54,7,14,19,28,10,29,26,46,38,7
3,10,16,49,7,26,12,24,31,0,3,12,12,19
4,10,16,49,7,26,28,33,43,2,16,24,31,9


In [10]:
test_2_data = encoded_test_2.drop(["NUMBER", "CODE", "SERIAL", "NAME", "COLOR", "GENERATION", "HEIGHT", "WEIGHT", "LEGENDARY_FLAG"], axis=1)
feature_names = data.columns
test_2_data.head()

Unnamed: 0,TYPE1,TYPE2,ABILITY1,ABILITY2,ABILITY_HIDDEN,HP,ATK,DEF,SP_ATK,SP_DEF,SPD,TOTAL,CAPTURE_RATE
0,16,10,35,26,43,9,16,9,7,4,10,11,15
1,14,10,44,15,0,9,23,9,20,17,36,25,12
2,14,3,44,15,0,17,29,15,26,23,42,42,4
3,14,10,16,14,30,26,16,13,7,4,1,17,12
4,13,13,40,14,30,29,29,20,33,20,6,40,6


In [11]:
# Support vector machine linear classifier
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model.fit(data, target)

SVC(kernel='linear')

In [12]:
# Model Accuracy
print('Test Acc:', model.score(test_1_data, test_1_target))
print('Test Acc:', model.score(test_2_data, test_2_target))

Test Acc: 0.7372881355932204
Test Acc: 0.8376068376068376


In [13]:
# Calculate classification report
from sklearn.metrics import classification_report

predictions_1 = model.predict(test_1_data)
predictions_2 = model.predict(test_2_data)

Gen_7_class_report = classification_report(test_1_target, predictions_1,
                            target_names=target_names, output_dict=True, zero_division=0)
Gen_8_class_report = classification_report(test_2_target, predictions_2,
                            target_names=target_names, output_dict=True, zero_division=0)

In [14]:
print('Gen 7 - ', Gen_7_class_report)
print('Gen 8 - ', Gen_8_class_report)

Gen 7 -  {'Non-Legendary': {'precision': 0.7372881355932204, 'recall': 1.0, 'f1-score': 0.848780487804878, 'support': 87}, 'Legendary': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 31}, 'accuracy': 0.7372881355932204, 'macro avg': {'precision': 0.3686440677966102, 'recall': 0.5, 'f1-score': 0.424390243902439, 'support': 118}, 'weighted avg': {'precision': 0.5435937948865269, 'recall': 0.7372881355932204, 'f1-score': 0.6257957833815626, 'support': 118}}
Gen 8 -  {'Non-Legendary': {'precision': 0.8376068376068376, 'recall': 1.0, 'f1-score': 0.9116279069767441, 'support': 98}, 'Legendary': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.8376068376068376, 'macro avg': {'precision': 0.4188034188034188, 'recall': 0.5, 'f1-score': 0.45581395348837206, 'support': 117}, 'weighted avg': {'precision': 0.7015852144057273, 'recall': 0.8376068376068376, 'f1-score': 0.763585768236931, 'support': 117}}


In [15]:
#convert results to dataframe
gen_7_classification_df = pd.DataFrame.from_dict(Gen_7_class_report)
gen_7_classification_df.rename({'precision': 'precision_gen_7', 
                                     'recall': 'recall_gen_7', 
                                     'f1-score': 'f1-score_gen_7', 
                                     'support': 'support_gen_7'}, inplace=True)
gen_8_classification_df = pd.DataFrame.from_dict(Gen_8_class_report)
gen_8_classification_df.rename({'precision': 'precision_gen_8', 
                                     'recall': 'recall_gen_8', 
                                     'f1-score': 'f1-score_gen_8', 
                                     'support': 'support_gen_8'}, inplace=True)
svc_results = gen_7_classification_df.append(gen_8_classification_df)
svc_results.rename(columns={'macro avg': 'macro_avg', 'weighted avg': 'weighted_avg'}, inplace=True)
svc_results

Unnamed: 0,Non-Legendary,Legendary,accuracy,macro_avg,weighted_avg
precision_gen_7,0.737288,0.0,0.737288,0.368644,0.543594
recall_gen_7,1.0,0.0,0.737288,0.5,0.737288
f1-score_gen_7,0.84878,0.0,0.737288,0.42439,0.625796
support_gen_7,87.0,31.0,0.737288,118.0,118.0
precision_gen_8,0.837607,0.0,0.837607,0.418803,0.701585
recall_gen_8,1.0,0.0,0.837607,0.5,0.837607
f1-score_gen_8,0.911628,0.0,0.837607,0.455814,0.763586
support_gen_8,98.0,19.0,0.837607,117.0,117.0


In [16]:
#convert results to sql table
svc_results.to_sql('svc_results', con, index=False)

In [17]:
con.close()