In [None]:
import importlib

import MyPyTable
importlib.reload(MyPyTable)
from MyPyTable import MyPyTable

import utils
importlib.reload(utils)

# Load Data from CSV

In [50]:
titanic_data = MyPyTable()
titanic_data.load_from_file("input_data/updated_titanic.csv")
# titanic_data.pretty_print()

<MyPyTable.MyPyTable at 0x175bd0350>

# Separate Embarked Classes

In [51]:
Q_titanic_data = []
C_titanic_data = []
S_titanic_data = []

# look at how data is imported
print(titanic_data.data[0])

for row in titanic_data.data:
    if row[0] == 1:
        Q_titanic_data.append(row)
    elif row[1] == 1:
        C_titanic_data.append(row)
    elif row[3] == 1:
        S_titanic_data.append(row)

# print("Q dataset: ", Q_titanic_data)
# print("C dataset: ", C_titanic_data)
# print("S dataset: ", S_titanic_data)

[0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 38.0, 1.0, 0.0, 71.2833]


# Entropy calculation  
### Shannon Entropy Calculation 
 
$H(X) = - \sum p(x) \log(p(x))$

In [52]:
Q_values = [row[0] for row in titanic_data.data]
C_values = [row[1] for row in titanic_data.data]
S_values = [row[2] for row in titanic_data.data]

Q_entropy = utils.calculate_shannon_entropy(Q_values)
C_entropy = utils.calculate_shannon_entropy(C_values)
S_entropy = utils.calculate_shannon_entropy(S_values)

print("Q entropy: ", Q_entropy)
print("C entropy: ", C_entropy)
print("S entropy: ", S_entropy)

Q entropy:  0.4247354052464511
C entropy:  0.6988787658095251
S entropy:  0.8520607567826528


### Renyi Entropy Calculation
$H_\alpha(X) = \frac{1}{1 - \alpha} \log_2 \left( \sum_{i=1}^{n} p(x_i)^\alpha \right)$

In [53]:
Q_renyi_entropy = utils.calculate_renyi_entropy(Q_values)
C_renyi_entropy = utils.calculate_renyi_entropy(C_values)
S_renyi_entropy = utils.calculate_renyi_entropy(S_values)

print("Q Renyi entropy: ", Q_renyi_entropy)
print("C Renyi entropy: ", C_renyi_entropy)
print("S Renyi entropy: ", S_renyi_entropy)

Q Renyi entropy:  0.2482164547619598
C Renyi entropy:  0.5275420924996362
S Renyi entropy:  0.7394021947104253


# Dataset Entropies for Features for Both Entropies  
Shannon measures prediactablitly -> lower entropy means higher predictability    
Renyi has a sharper punishment of lower predictablity events 

Ideal feature? Intra vs inter...

In [54]:
survived = [row[3] for row in titanic_data.data]
p_class = [row[4] for row in titanic_data.data]
sex = [row[5] for row in titanic_data.data]
age = [row[6] for row in titanic_data.data]
sib_sp = [row[7] for row in titanic_data.data]
par_ch = [row[8] for row in titanic_data.data]

# Shannon entropy
survived_shannon = utils.calculate_shannon_entropy(survived)
p_class_shannon = utils.calculate_shannon_entropy(p_class)
sex_shannon = utils.calculate_shannon_entropy(sex)
age_shannon = utils.calculate_shannon_entropy(age)
sib_sp_shannon = utils.calculate_shannon_entropy(sib_sp)
par_ch_shannon = utils.calculate_shannon_entropy(par_ch)

print("Survived shannon: ", survived_shannon)
print("Pclass shannon: ", p_class_shannon)
print("Sex shannon: ", sex_shannon)
print("Age shannon: ", age_shannon)
print("Sibsp shannon: ", sib_sp_shannon)
print("Parch shannon: ", par_ch_shannon)


Survived shannon:  0.9610018162432594
Pclass shannon:  1.4399719859739184
Sex shannon:  0.9365517326147232
Age shannon:  5.291844098413824
Sibsp shannon:  1.3377097254948143
Parch shannon:  1.129162033353948


In [55]:
# Renyi Entropy
survived_renyi = utils.calculate_renyi_entropy(survived)
p_class_renyi = utils.calculate_renyi_entropy(p_class)
sex_renyi = utils.calculate_renyi_entropy(sex)
age_renyi = utils.calculate_renyi_entropy(age)
sib_sp_renyi = utils.calculate_renyi_entropy(sib_sp)
par_ch_renyi = utils.calculate_renyi_entropy(par_ch)

print("Survived renyi: ", survived_renyi)
print("Pclass renyi: ", p_class_renyi)
print("Sex renyi: ", sex_renyi)
print("Age renyi: ", age_renyi)
print("Sibsp renyi: ", sib_sp_renyi)
print("Parch renyi: ", par_ch_renyi)

Survived renyi:  0.9247082917316864
Pclass renyi:  1.3048541529744426
Sex renyi:  0.880098358054565
Age renyi:  3.998783847810187
Sibsp renyi:  0.9347769469386966
Parch renyi:  0.7265004969325213


# Embarked Subset Data  
Calculate the entropy for each feature that has value (not classifier or fare).

In [63]:
# Q Embarked Dataset
Q_survived = [row[3] for row in Q_titanic_data]
Q_p_class = [row[4] for row in Q_titanic_data]
Q_sex = [row[5] for row in Q_titanic_data]
Q_age = [row[6] for row in Q_titanic_data]
Q_sib_sp = [row[7] for row in Q_titanic_data]
Q_par_ch = [row[8] for row in Q_titanic_data]

# Shannon entropy
Q_survived_shannon = utils.calculate_shannon_entropy(Q_survived)
Q_p_class_shannon = utils.calculate_shannon_entropy(Q_p_class)
Q_sex_shannon = utils.calculate_shannon_entropy(Q_sex)
Q_age_shannon = utils.calculate_shannon_entropy(Q_age)
Q_sib_sp_shannon = utils.calculate_shannon_entropy(Q_sib_sp)
Q_par_ch_shannon = utils.calculate_shannon_entropy(Q_par_ch)

print("Q Embarked")
print("Survived shannon: ", Q_survived_shannon)
print("Pclass shannon: ", Q_p_class_shannon)
print("Sex shannon: ", Q_sex_shannon)
print("Age shannon: ", Q_age_shannon)
print("Sibsp shannon: ", Q_sib_sp_shannon)
print("Parch shannon: ", Q_par_ch_shannon)
print()

# Renyi Entropy
Q_survived_renyi = utils.calculate_renyi_entropy(Q_survived)
Q_p_class_renyi = utils.calculate_renyi_entropy(Q_p_class)
Q_sex_renyi = utils.calculate_renyi_entropy(Q_sex)
Q_age_renyi = utils.calculate_renyi_entropy(Q_age)
Q_sib_sp_renyi = utils.calculate_renyi_entropy(Q_sib_sp)
Q_par_ch_renyi = utils.calculate_renyi_entropy(Q_par_ch)

print("Survived renyi: ", Q_survived_renyi)
print("Pclass renyi: ", Q_p_class_renyi)
print("Sex renyi: ", Q_sex_renyi)
print("Age renyi: ", Q_age_renyi)
print("Sibsp renyi: ", Q_sib_sp_renyi)
print("Parch renyi: ", Q_par_ch_renyi)

Q Embarked
Survived shannon:  0.9645476589143231
Pclass shannon:  0.40978020814931754
Sex shannon:  0.9969562518473083
Age shannon:  2.498048755126898
Sibsp shannon:  1.0994543949868185
Parch shannon:  0.5914883261232053

Survived renyi:  0.9313381800487386
Pclass renyi:  0.19010973772190023
Sex renyi:  0.9939295751517668
Age renyi:  1.2258030494991
Sibsp renyi:  0.7090120514195228
Parch renyi:  0.30505498367360884


In [64]:
# C Embarked Dataset
C_survived = [row[3] for row in C_titanic_data]
C_p_class = [row[4] for row in C_titanic_data]
C_sex = [row[5] for row in C_titanic_data]
C_age = [row[6] for row in C_titanic_data]
C_sib_sp = [row[7] for row in C_titanic_data]
C_par_ch = [row[8] for row in C_titanic_data]

# Shannon entropy
C_survived_shannon = utils.calculate_shannon_entropy(C_survived)
C_p_class_shannon = utils.calculate_shannon_entropy(C_p_class)
C_sex_shannon = utils.calculate_shannon_entropy(C_sex)
C_age_shannon = utils.calculate_shannon_entropy(C_age)
C_sib_sp_shannon = utils.calculate_shannon_entropy(C_sib_sp)
C_par_ch_shannon = utils.calculate_shannon_entropy(C_par_ch)

print("C Embarked")
print("Survived shannon: ", C_survived_shannon)
print("Pclass shannon: ", C_p_class_shannon)
print("Sex shannon: ", C_sex_shannon)
print("Age shannon: ", C_age_shannon)
print("Sibsp shannon: ", C_sib_sp_shannon)
print("Parch shannon: ", C_par_ch_shannon)
print()

# Renyi Entropy
C_survived_renyi = utils.calculate_renyi_entropy(C_survived)
C_p_class_renyi = utils.calculate_renyi_entropy(C_p_class)
C_sex_renyi = utils.calculate_renyi_entropy(C_sex)
C_age_renyi = utils.calculate_renyi_entropy(C_age)
C_sib_sp_renyi = utils.calculate_renyi_entropy(C_sib_sp)
C_par_ch_renyi = utils.calculate_renyi_entropy(C_par_ch)

print("Survived renyi: ", C_survived_renyi)
print("Pclass renyi: ", C_p_class_renyi)
print("Sex renyi: ", C_sex_renyi)
print("Age renyi: ", C_age_renyi)
print("Sibsp renyi: ", C_sib_sp_renyi)
print("Parch renyi: ", C_par_ch_renyi)

C Embarked
Survived shannon:  0.9917033083725818
Pclass shannon:  1.3612751028346035
Sex shannon:  0.9875943537925622
Age shannon:  4.995226904564466
Sibsp shannon:  1.1017123602974421
Parch shannon:  1.115894976744305

Survived renyi:  0.9835327884112297
Pclass renyi:  1.2496025310282517
Sex renyi:  0.9754696401856398
Age renyi:  3.753967709310579
Sibsp renyi:  0.9385568593849218
Parch renyi:  0.7986150724934451


In [67]:
# S Embarked Dataset
S_survived = [row[3] for row in S_titanic_data]
S_p_class = [row[4] for row in S_titanic_data]
S_sex = [row[5] for row in S_titanic_data]
S_age = [row[6] for row in S_titanic_data]
S_sib_sp = [row[7] for row in S_titanic_data]
S_par_ch = [row[8] for row in S_titanic_data]

# Shannon entropy
S_survived_shannon = utils.calculate_shannon_entropy(S_survived)
S_p_class_shannon = utils.calculate_shannon_entropy(S_p_class)
S_sex_shannon = utils.calculate_shannon_entropy(S_sex)
S_age_shannon = utils.calculate_shannon_entropy(S_age)
S_sib_sp_shannon = utils.calculate_shannon_entropy(S_sib_sp)
S_par_ch_shannon = utils.calculate_shannon_entropy(S_par_ch)

print("S Embarked")
print("Survived shannon: ", S_survived_shannon)
print("Pclass shannon: ", S_p_class_shannon)
print("Sex shannon: ", S_sex_shannon)
print("Age shannon: ", S_age_shannon)
print("Sibsp shannon: ", S_sib_sp_shannon)
print("Parch shannon: ", S_par_ch_shannon)
print()

# Renyi Entropy
S_survived_renyi = utils.calculate_renyi_entropy(S_survived)
S_p_class_renyi = utils.calculate_renyi_entropy(S_p_class)
S_sex_renyi = utils.calculate_renyi_entropy(S_sex)
S_age_renyi = utils.calculate_renyi_entropy(S_age)
S_sib_sp_renyi = utils.calculate_renyi_entropy(S_sib_sp)
S_par_ch_renyi = utils.calculate_renyi_entropy(S_par_ch)

print("Survived renyi: ", S_survived_renyi)
print("Pclass renyi: ", S_p_class_renyi)
print("Sex renyi: ", S_sex_renyi)
print("Age renyi: ", S_age_renyi)
print("Sibsp renyi: ", S_sib_sp_renyi)
print("Parch renyi: ", S_par_ch_renyi)

# print(S_titanic_data)

S Embarked
Survived shannon:  0.0
Pclass shannon:  1.5824904486235674
Sex shannon:  0.9354872668926244
Age shannon:  5.441790470190361
Sibsp shannon:  1.2895504678514702
Parch shannon:  1.3757450951300618

Survived renyi:  -0.0
Pclass renyi:  1.5800976570825727
Sex renyi:  0.878198914458595
Age renyi:  5.032889169892737
Sibsp renyi:  1.039860960124198
Parch renyi:  1.0841990321133155
