In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Extract the data
df = pd.read_csv("GOT_data/battles.csv")

df
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38 entries, 0 to 37
Data columns (total 25 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   name                38 non-null     object 
 1   year                38 non-null     int64  
 2   battle_number       38 non-null     int64  
 3   attacker_king       36 non-null     object 
 4   defender_king       35 non-null     object 
 5   attacker_1          38 non-null     object 
 6   attacker_2          10 non-null     object 
 7   attacker_3          3 non-null      object 
 8   attacker_4          2 non-null      object 
 9   defender_1          37 non-null     object 
 10  defender_2          2 non-null      object 
 11  defender_3          0 non-null      float64
 12  defender_4          0 non-null      float64
 13  attacker_outcome    37 non-null     object 
 14  battle_type         37 non-null     object 
 15  major_death         37 non-null     float64
 16  major_capt

In [3]:
# Transform the data
# Create a new dataset with the selected columns required for the data analysis
battles = df[['name', 'year', 'attacker_king', 'defender_king', 'attacker_outcome', 'attacker_size', 'defender_size']]
battles

Unnamed: 0,name,year,attacker_king,defender_king,attacker_outcome,attacker_size,defender_size
0,Battle of the Golden Tooth,298,Joffrey/Tommen Baratheon,Robb Stark,win,15000.0,4000.0
1,Battle at the Mummer's Ford,298,Joffrey/Tommen Baratheon,Robb Stark,win,,120.0
2,Battle of Riverrun,298,Joffrey/Tommen Baratheon,Robb Stark,win,15000.0,10000.0
3,Battle of the Green Fork,298,Robb Stark,Joffrey/Tommen Baratheon,loss,18000.0,20000.0
4,Battle of the Whispering Wood,298,Robb Stark,Joffrey/Tommen Baratheon,win,1875.0,6000.0
5,Battle of the Camps,298,Robb Stark,Joffrey/Tommen Baratheon,win,6000.0,12625.0
6,Sack of Darry,298,Joffrey/Tommen Baratheon,Robb Stark,win,,
7,Battle of Moat Cailin,299,Balon/Euron Greyjoy,Robb Stark,win,,
8,Battle of Deepwood Motte,299,Balon/Euron Greyjoy,Robb Stark,win,1000.0,
9,Battle of the Stony Shore,299,Balon/Euron Greyjoy,Robb Stark,win,264.0,


In [4]:
# Checking null values
battles.isnull().sum()

name                 0
year                 0
attacker_king        2
defender_king        3
attacker_outcome     1
attacker_size       14
defender_size       19
dtype: int64

In [5]:
# Replace all null values
battles = battles.copy()
battles.fillna({'attacker_king': 'No King'}, inplace=True)
battles.fillna({'defender_king':'No King'},inplace=True)
battles.fillna({'attacker_outcome':'loss'},inplace=True)

battles.isnull().sum()

name                 0
year                 0
attacker_king        0
defender_king        0
attacker_outcome     0
attacker_size       14
defender_size       19
dtype: int64

In [6]:
# Replace the null values to the average (attacker_size)
attacker_size_sum=battles.attacker_size.sum()
average=attacker_size_sum/38
average

6279.5

In [7]:
battles.fillna({'attacker_size':6280},inplace=True)

In [8]:
# Replace the null values to the average (defender_size)
defender_size_sum=battles.defender_size.sum()
average=defender_size_sum/38
average

3214.0789473684213

In [9]:
battles.fillna({'defender_size':3214},inplace=True)

In [10]:
# Change the datatype for analyzing
battles['attacker_size'] = battles['attacker_size'].apply(int)
battles['defender_size'] = battles['defender_size'].apply(int)
battles.dtypes

name                object
year                 int64
attacker_king       object
defender_king       object
attacker_outcome    object
attacker_size        int64
defender_size        int64
dtype: object

In [11]:
# Load the data
battles.to_csv("GOT_data/battles_cleaned.csv")

In [13]:
battles = pd.read_csv("GOT_data/battles_cleaned.csv")
battles

Unnamed: 0.1,Unnamed: 0,name,year,attacker_king,defender_king,attacker_outcome,attacker_size,defender_size
0,0,Battle of the Golden Tooth,298,Joffrey/Tommen Baratheon,Robb Stark,win,15000,4000
1,1,Battle at the Mummer's Ford,298,Joffrey/Tommen Baratheon,Robb Stark,win,6280,120
2,2,Battle of Riverrun,298,Joffrey/Tommen Baratheon,Robb Stark,win,15000,10000
3,3,Battle of the Green Fork,298,Robb Stark,Joffrey/Tommen Baratheon,loss,18000,20000
4,4,Battle of the Whispering Wood,298,Robb Stark,Joffrey/Tommen Baratheon,win,1875,6000
5,5,Battle of the Camps,298,Robb Stark,Joffrey/Tommen Baratheon,win,6000,12625
6,6,Sack of Darry,298,Joffrey/Tommen Baratheon,Robb Stark,win,6280,3214
7,7,Battle of Moat Cailin,299,Balon/Euron Greyjoy,Robb Stark,win,6280,3214
8,8,Battle of Deepwood Motte,299,Balon/Euron Greyjoy,Robb Stark,win,1000,3214
9,9,Battle of the Stony Shore,299,Balon/Euron Greyjoy,Robb Stark,win,264,3214
