In [None]:
# Import Dependences 
import numpy as np 
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from collections import defaultdict

In [None]:
# Import Pokemon CSV data
pokemon = pd.read_csv("pokemon.csv")
pokemon.head()

In [None]:
# Import Pokemon Combats CSV data
combats = pd.read_csv("combats.csv")
combats.head(5)

In [None]:
# Rename column with symbol 
pokemon = pokemon.rename(index=str, columns={"#": "Pokemon Number"})
pokemon.columns

In [None]:
# pokemon = pokemon.set_index("Pokemon Number")
# pokemon

In [None]:
# Display Combats Table with Names
combats.head()

In [None]:
# What are the Dimenstions of the Pokemon and Combat Data Series
print("Dimenstions of Pokemon: " + str(pokemon.shape))
print("Dimenstions of Combat: " + str(combats.shape))

In [None]:
# Identify Any Null fields for Pokemon Data
pokemon.isnull().sum()

In [None]:
pokemon.info()

In [None]:
# Identify Any Null fields for Combat Data
combats.isnull().sum()

In [None]:
combats.info()

In [None]:
# What is the missing Pokemon's line? 
print(pokemon[pokemon["Name"].isnull ()])

In [None]:
#Check the name of the pokemon number before and after
print("The pokemon before the missing pokemon is " + pokemon['Name'][61])
print('------------------')
print("The pokemon after the missing pokemon is " + pokemon['Name'][63])

In [None]:
#Update the name of the missing Pokemon
pokemon['Name'][63] = "Primeape"

In [None]:
# Confirm count of null fields in the pokemon data set
pokemon.isnull().sum()

In [None]:
# What is the name of the missing Pokemon
print(" The missing pokemon was "+ pokemon["Name"][63]+ ", The next evolution of "+ pokemon["Name"][62])

In [None]:
# How many unique Type 1 Pokemon are there in the dataset
print ("There are {} Types.".format(len(pokemon["Type 1"].drop_duplicates())))
list(pokemon["Type 1"].drop_duplicates())

In [None]:
# How many Type 1 pokemon are in the dataset
print(pokemon['Type 1'].value_counts(dropna =False))

In [None]:
#poke_count = pokemon['Type 1'].value_counts(dropna =False))

In [None]:
ax = sns.countplot(x="Type 1", hue="Legendary", data=pokemon)
plt.xticks(rotation= 90)
plt.xlabel('Type 1')
plt.ylabel('Total ')
plt.title("Total Pokemon by Type 1")

In [None]:
ax = sns.countplot(x="Type 2", hue="Legendary", data=pokemon)
plt.xticks(rotation= 90)
plt.xlabel('Type 2')
plt.ylabel('Total ')
plt.title("Total Pokemon by Type 2")

In [None]:
# How many unique Type 1 and Type 2 combinations in the dataset
type_cols = ["Type 1", "Type 2"]
print ("There are {} unique type-combinations.".format(len(pokemon[type_cols].drop_duplicates())))

In [None]:
corr_data = pokemon.drop(columns=['Pokemon Number'])
corr_data
#pokemon.describe()

In [None]:
corr_data.corr()

In [None]:
#correlation map
corr = corr_data.corr()
ax = sns.heatmap(
    corr, 
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);

In [None]:
pokemon["Type 2"] = pokemon["Type 2"].fillna("None")
type_cross = pd.crosstab(pokemon["Type 1"], pokemon["Type 2"])
type_cross.plot.bar(stacked=True, figsize=(14,4))
plt.legend(bbox_to_anchor=(0.01, 0.99), loc='upper left', ncol=5, fontsize=8, title="Type 2")
plt.show()

In [None]:
# calculate the win % of each pokemon 
# add the calculation to the pokemon dataset 
total_Wins = combats.Winner.value_counts()
# get the number of wins for each pokemon
numberOfWins = combats.groupby('Winner').count()
print(total_Wins)
print("--------------------------------------------------------------------------")
print(numberOfWins)

In [None]:
#both methods produce the same results
countByFirst = combats.groupby('Second_pokemon').count()
countBySecond = combats.groupby('First_pokemon').count()
print("Looking at the dimensions of our dataframes")
print("Count by first winner shape: " + str(countByFirst.shape))
print("Count by second winner shape: " + str(countBySecond.shape))
print("Total Wins shape : " + str(total_Wins.shape))

In [None]:
find_losing_pokemon= np.setdiff1d(countByFirst.index.values, numberOfWins.index.values)-1
losing_pokemon = pokemon.iloc[find_losing_pokemon[0],]
print(losing_pokemon)

In [None]:
numberOfWins = numberOfWins.sort_index()
numberOfWins['Total Fights'] = countByFirst.Winner + countBySecond.Winner
numberOfWins['Win Percentage']= numberOfWins.First_pokemon/numberOfWins['Total Fights']

# merge the winning dataset and the original pokemon dataset
results2 = pd.merge(pokemon, numberOfWins, right_index = True, left_on="Pokemon Number")
results3 = pd.merge(pokemon, numberOfWins, left_on="Pokemon Number", right_index = True, how='left')

# We can look at the difference between the two datasets to see which pokemon never recorded a fight
#missing_Pokemon = np.setdiff1d(pokemon.index.values, results3.index.values)
#subset the dataframe where pokemon win percent is NaN
Win_Percent = results3[results3['Win Percentage'].isnull()]
Win_Percent

In [None]:
Win_Percent_Type = results3.groupby('Type 1').agg({"Win Percentage": "mean"}).sort_values(by = "Win Percentage", ascending=False)
Win_Percent_Type.head(5)

In [None]:
Win_Percent_Type.tail(5)

In [None]:
Total_Fights = results3.groupby('Type 1').agg({"Total Fights": "mean"}).sort_values(by = "Total Fights", ascending=False)
Total_Fights.head(5)

In [None]:
Total_Fights.tail(5)

In [None]:
Win_Percent.columns

In [None]:
Win_Corr = Win_Percent.drop(columns=['Pokemon Number','Generation','Legendary','First_pokemon','Second_pokemon','Total Fights'])
Win_Corr.corr()

#correlation map
corr = Win_Corr.corr()
ax = sns.heatmap(
    corr, 
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right');

In [None]:
names_dict = dict(zip(pokemon['Pokemon Number'], pokemon['Name']))
cols = ["First_pokemon","Second_pokemon","Winner"]
combats_name = combats[cols].replace(names_dict)
combats = combats_name
combats