In [None]:
#kerasをインポートする
from keras import backend as K

In [None]:
#pandasとnumpyをインポートする
import pandas as pd
import numpy as np

In [None]:
#トレーニングデータとテストデータを読み込む
train = pd.read_csv("./train.csv")
test = pd.read_csv("./test.csv")

In [None]:
#トレーニングデータの先頭を表示
train.head()

In [None]:
#教師データを抽出
Y_train = train["Survived"]
Y_train.head()

In [None]:
#テストデータの先頭を表示
test.head()

In [None]:
#テストデータとトレーニングデータshapeを表示
test_shape = test.shape
train_shape = train.shape

print(test_shape)
print(train_shape)

In [None]:
#データがない部分にAgeは年齢の平均をEmbarkedにはSを代入する
train["Age"] = train["Age"].fillna(train["Age"].median())
train["Embarked"] = train["Embarked"].fillna("S")

test["Age"] = test["Age"].fillna(test["Age"].median())
test["Embarked"] = test["Embarked"].fillna("S")

In [None]:
#文字列を数値に変換する
train["Sex"][train["Sex"] == "male"] = 0
train["Sex"][train["Sex"] == "female"] = 1
train["Embarked"][train["Embarked"] == "S" ] = 0
train["Embarked"][train["Embarked"] == "C" ] = 1
train["Embarked"][train["Embarked"] == "Q"] = 2
train.head(10)

test["Sex"][test["Sex"] == "male"] = 0
test["Sex"][test["Sex"] == "female"] = 1
test["Embarked"][test["Embarked"] == "S" ] = 0
test["Embarked"][test["Embarked"] == "C" ] = 1
test["Embarked"][test["Embarked"] == "Q"] = 2
test.head(10)

In [None]:
#Survived自身とSurvivedデータと関係ないデータを削除する
#Name(名前)とTicket(チケット番号)、PassengerId(乗客番号)は固有の値であるため
#Cabin(客室番号)は角質の場所がわからないため
train = train.drop(["Name","Cabin","Ticket","PassengerId","Survived"], axis=1)
train.head()

test = test.drop(["Name","Cabin","Ticket","PassengerId"], axis=1)
train.head()

In [None]:
#非数値特徴をバイナリベクトルに変換する
from keras.utils import np_utils
P_train = np_utils.to_categorical(train["Pclass"],4)

P_test = np_utils.to_categorical(test["Pclass"],4)

In [None]:
#P_trainとP_testをnumpy型からpandas型に変換する
P_train = pd.DataFrame(P_train)
P_test = pd.DataFrame(P_test)

In [None]:
P_train.head()

In [None]:
P_test.head()

In [None]:
#情報量のない0列を削除する
P_train = P_train.drop([0], axis=1)
P_test = P_test.drop([0], axis=1)

In [None]:
P_train.head()

In [None]:
P_test.head()

In [None]:
#P_trainの列に名前をつける
P_train = P_train.rename(columns={1: 'Pclass_vec1',2:'Pclass_vec2',3:'Pclass_vec3'})
P_test = P_test.rename(columns={1: 'Pclass_vec1',2:'Pclass_vec2',3:'Pclass_vec3'})

In [None]:
P_train.head()

In [None]:
P_test.head()

In [None]:
#非数値特徴を持っていたPclass列を削除する
train = train.drop(["Pclass"], axis=1)
train = pd.concat([train, P_train], axis=1)
train.head()

In [None]:
#非数値特徴を持っていたPclass列を削除する
test = test.drop(["Pclass"], axis=1)
test = pd.concat([test, P_test], axis=1)
test.head()

In [None]:
print(train.shape)
print(test.shape)

In [None]:
#非数値特徴をバイナリベクトルに変換する関数
def num2vec(col_name,data):
    c_train = np_utils.to_categorical(data[col_name],data[col_name].max()+1)
    c_train = pd.DataFrame(c_train)
    return c_train

In [None]:
#Sexの非数値特徴をバイナリベクトルに変換する
Sex_train = num2vec("Sex",train)
Sex_train = Sex_train.rename(columns={0: '男性',1:'女性'})

In [None]:
#Sexの非数値特徴をバイナリベクトルに変換する
Sex_test = num2vec("Sex",test)
Sex_test = Sex_test.rename(columns={0: '男性',1:'女性'})

In [None]:
#Embarkedの非数値特徴をバイナリベクトルに変換する
Embarked_train = num2vec("Embarked",train)
Embarked_train = Embarked_train.rename(columns={0: 'Cherbourg',1:'Queenstown',2:'Southampton'})

In [None]:
#Embarkedの非数値特徴をバイナリベクトルに変換する
Embarked_test = num2vec("Embarked",test)
Embarked_test = Embarked_test.rename(columns={0: 'Cherbourg',1:'Queenstown',2:'Southampton'})

In [None]:
Sex_train.head()

In [None]:
Sex_test.head()

In [None]:
Embarked_train.head()

In [None]:
Embarked_test.head()

In [None]:
#バイナリベクトルに変換したSexとEmbarkedをトレーニングデータに加える
train = train.drop(["Sex"], axis=1)
train = train.drop(["Embarked"], axis=1)
train = pd.concat([train, Sex_train,Embarked_train], axis=1)
train.head()

In [None]:
#バイナリベクトルに変換したSexとEmbarkedをトレーニングデータに加える
test = test.drop(["Sex"], axis=1)
test = test.drop(["Embarked"], axis=1)
test = pd.concat([test, Sex_test,Embarked_test], axis=1)
test.head()

In [None]:
#トレーニングデータを正規化する
train = (train - train.min()) / (train.max() - train.min())
train.head()

In [None]:
#トレーニングデータを正規化する
test = (test - test.min()) / (test.max() - test.min())
test.head()

In [None]:
#教師データをバイナリベクトルに変換する
Y_train = np_utils.to_categorical(Y_train,2)
Y_train

In [None]:
#MLP(3層のニューラルネットワーク)を作成する
from keras.callbacks import EarlyStopping
early = EarlyStopping(monitor = 'val_loss',patience = 5,verbose=1)

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense,Activation
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.models import load_model
    
from keras.layers.core import Dropout

n_in = len(train.columns)
n_out = len(Y_train[0])

#中間総数
n_hidden = 100
#学習回数
epochs = 10
#バッチサイズ
batch_size = 10

model = Sequential()
model.add(Dense(n_hidden,input_dim=n_in))
model.add(Activation('sigmoid'))
model.add(Dropout(0.5))

model.add(Dense(output_dim=n_out))
model.add(Activation('softmax'))   
    
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])

history = model.fit(train,Y_train,epochs = epochs,batch_size = batch_size,callbacks=[early])

In [None]:
#テストデータの予測を行う
prob = model.predict(test)
prob

In [None]:
#テストデータの予測を1次元に変換する
ans = [0 if prob[i][0] > prob[i][1] else 1 for i in range(len(prob))]

In [None]:
ans

In [None]:
test = pd.read_csv("./test.csv")
# PassengerIdを取得
PassengerId = np.array(test["PassengerId"]).astype(int)
PassengerId

In [None]:
# my_prediction(予測データ）とPassengerIdをデータフレームへ落とし込む
my_solution = pd.DataFrame(ans, PassengerId, columns = ["Survived"])

# csvとして書き出す
my_solution.to_csv("titanicNN.csv", index_label = ["PassengerId"])