In [0]:
'''
データのロード
'''
import pickle

pickle_file = "./titanic_data_forDNN.pickle"
with open(pickle_file, 'rb') as f:
    X_train = pickle.load(f)
    Y_train = pickle.load(f)
    X_test = pickle.load(f)

In [0]:
'''
モデルの構築
'''
models = []

In [0]:
# NN
from keras import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier

def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim = 18, activation = 'relu'))
    model.add(Dropout(0.2))
    model.add(Dense(8, activation = 'relu'))
    model.add(Dense(1, activation = 'sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

estimator_nn = KerasClassifier(build_fn = create_baseline, epochs = 20, batch_size = 10, verbose = 1)
models.append(("NN",estimator_nn))

In [0]:
# SVC
from sklearn.svm import SVC

estimator_svc = SVC()
models.append(("SVM", estimator_svc))

In [0]:
# xgBoost
import xgboost as xgb

estimator_xgb = xgb.XGBClassifier(max_depth=3, learning_rate=0.1, n_estimators=100)
models.append(("XGB",estimator_xgb))

In [0]:
# RandomForest
from sklearn.ensemble import RandomForestClassifier

estimator_rfc = RandomForestClassifier()
models.append(("RFC",estimator_rfc))

In [0]:
# kfold
from sklearn.model_selection import StratifiedKFold

kfold = StratifiedKFold(n_splits = 5, random_state = 42, shuffle = False)

In [0]:
# 学習
from sklearn.model_selection import cross_val_score
results = []
names = []

for name,model in models:
    result = cross_val_score(model,X_train,Y_train, cv = kfold, scoring = "accuracy")
    names.append(name)
    results.append(result)

In [0]:
'''
評価
'''
for i in range(len(names)):
    print(names[i])
    print('mean acc :', round(results[i].mean()*100, 1), 'standard deviation :', round(results[i].std()*100, 1))
    

In [0]:
'''
テスト
'''
estimator_nn.fit(X_train, Y_train, epochs = 20, batch_size = 10)
prediction = estimator_nn.predict(X_test).tolist()

In [0]:
'''
提出
'''
import pandas as pd

# List to series
data_check =  pd.read_csv("test.csv")
se = pd.Series(prediction)
# Creating new column of predictions in data_check dataframe
data_check['check'] = se
data_check['check'] = data_check['check'].str.get(0)

series = []
for val in data_check.check:
    if val >= 0.5:
        series.append(1)
    else:
        series.append(0)
data_check['final'] = series

match = 0
nomatch = 0
for val in data_check.values:
    if val[1] == val[3]:
        match = match +1
    else:
        nomatch = nomatch +1

####################################
# Submission
#################################### 

temp = pd.DataFrame(pd.read_csv("test.csv")['PassengerId'])
temp['Survived'] = data_check['final']
temp.to_csv("submission.csv", index = False)

https://www.kaggle.com/vincentlugat/titanic-neural-networks-keras-81-8/code