## 載入需要的模組

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

import pandas as pd

## 讀入資料

In [None]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
submit = pd.read_csv('gender_submission.csv')

## 先看一下資料內容

In [None]:
train.head()

## 看看缺失值的狀況

In [None]:
train.info()

In [None]:
test.info()

## 看看資料的分布狀況

In [None]:
train.describe()

In [None]:
test.describe()

## 從上面觀察的結果，選取自己想要的特徵

In [None]:
selected_features = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex', 'Embarked']

In [None]:
X_train = train[selected_features]

In [None]:
y_train = train['Survived']

In [None]:
X_test = test[selected_features]

## 處理缺失值

In [None]:
X_train.info()

In [None]:
X_test.info()

In [None]:
X_train['Age'].fillna(X_train['Age'].mean(), inplace=True)
X_train['Embarked'].fillna('S', inplace=True)
X_test['Age'].fillna(X_test['Age'].mean(), inplace=True)
X_test['Fare'].fillna(X_test['Fare'].mean(), inplace=True)

In [None]:
X_train.info()

## 將字串類型的類別資料做轉換

In [None]:
X_train = pd.get_dummies(X_train)

In [None]:
X_train.head()

In [None]:
X_test = pd.get_dummies(X_test)

In [None]:
X_test.head()

## 使用 MLP 來做分類

In [None]:
import keras

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(40, input_dim=10, activation=('relu')))
model.add(keras.layers.Dense(30, activation=('relu')))
model.add(keras.layers.Dense(1, activation=('sigmoid')))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

## 訓練模型

In [None]:
model.fit(X_train, y_train, validation_split=0.1, epochs=30)

## 測試模型

In [None]:
model.evaluate(X_test, y_test)

## 用訓練好的模型來做預測

In [None]:
survived_predict = model.predict_classes(X_test)

In [None]:
survived_predict[:, 0]

## 將結果輸出成規定的格式

In [None]:
submission=pd.DataFrame({'PassengerId':test['PassengerId'], 'Survived':survived_predict})

In [None]:
submission.to_csv('submission.csv', index=False)