# 載入模組(使用Keras)

In [15]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn import preprocessing
import pandas as pd
import numpy as np

In [293]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
submit = pd.read_csv('gender_submission.csv')

# 觀察資料

In [294]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [295]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB


In [296]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [297]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
PassengerId    418 non-null int64
Pclass         418 non-null int64
Name           418 non-null object
Sex            418 non-null object
Age            332 non-null float64
SibSp          418 non-null int64
Parch          418 non-null int64
Ticket         418 non-null object
Fare           417 non-null float64
Cabin          91 non-null object
Embarked       418 non-null object
dtypes: float64(2), int64(4), object(5)
memory usage: 36.0+ KB


# 選擇需要的資料欄位

In [298]:
selected_features = ['Pclass', 'Age',  'Fare', 'Sex', 'Embarked']

In [299]:
X_train = train[selected_features]

In [300]:
y_train = train['Survived']

In [301]:
X_test = test[selected_features]

In [302]:
X_train['Age'].fillna(X_train['Age'].mean(), inplace=True)
X_train['Embarked'].fillna('S', inplace=True)
X_test['Age'].fillna(X_test['Age'].mean(), inplace=True)
X_test['Fare'].fillna(X_test['Fare'].mean(), inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


# 設置虛擬變數

In [303]:
X_train = pd.get_dummies(X_train)

In [304]:
X_test = pd.get_dummies(X_test)

In [305]:
X_test.head()

Unnamed: 0,Pclass,Age,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,3,34.5,7.8292,0,1,0,1,0
1,3,47.0,7.0,1,0,0,0,1
2,2,62.0,9.6875,0,1,0,1,0
3,3,27.0,8.6625,0,1,0,0,1
4,3,22.0,12.2875,1,0,0,0,1


In [306]:
X_train.head()

Unnamed: 0,Pclass,Age,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,3,22.0,7.25,0,1,0,0,1
1,1,38.0,71.2833,1,0,1,0,0
2,3,26.0,7.925,1,0,0,0,1
3,1,35.0,53.1,1,0,0,0,1
4,3,35.0,8.05,0,1,0,0,1


# 標準化

In [274]:
minmax_scale = preprocessing.MinMaxScaler(feature_range=(0 ,1))

In [275]:
X_train = minmax_scale.fit_transform(X_train)

In [276]:
X_train[:2]#參數皆介於零到一之間

array([[1.        , 0.27117366, 0.01415106, 0.        , 1.        ,
        0.        , 0.        , 1.        ],
       [0.        , 0.4722292 , 0.13913574, 1.        , 0.        ,
        1.        , 0.        , 0.        ]])

In [277]:
y_train = y_train.values

In [278]:
X_test = minmax_scale.fit_transform(X_test)

# 建立模型

In [307]:
model = Sequential()

In [308]:
model.add(Dense(units=40, input_dim=8,
               kernel_initializer="uniform",
               activation="relu"))

In [309]:
model.add(Dense(units=30, 
               kernel_initializer="uniform",
               activation="relu"))

In [310]:
model.add(Dense(units=1, 
               kernel_initializer="uniform",
               activation="sigmoid"))

In [311]:
model.compile(loss="binary_crossentropy",
             optimizer="adam",metrics=["accuracy"])

In [312]:
train_history=model.fit(x=X_train,
                       y=y_train,
                       validation_split=0.1,
                       epochs=50,batch_size=30,
                       verbose=2)

Train on 801 samples, validate on 90 samples
Epoch 1/50
 - 0s - loss: 0.6811 - acc: 0.6180 - val_loss: 0.6632 - val_acc: 0.6556
Epoch 2/50
 - 0s - loss: 0.6467 - acc: 0.6792 - val_loss: 0.6181 - val_acc: 0.6444
Epoch 3/50
 - 0s - loss: 0.6191 - acc: 0.6704 - val_loss: 0.5827 - val_acc: 0.6778
Epoch 4/50
 - 0s - loss: 0.6023 - acc: 0.6742 - val_loss: 0.5626 - val_acc: 0.6889
Epoch 5/50
 - 0s - loss: 0.5911 - acc: 0.6904 - val_loss: 0.5477 - val_acc: 0.6667
Epoch 6/50
 - 0s - loss: 0.5805 - acc: 0.6891 - val_loss: 0.5296 - val_acc: 0.7000
Epoch 7/50
 - 0s - loss: 0.5727 - acc: 0.6954 - val_loss: 0.5214 - val_acc: 0.7111
Epoch 8/50
 - 0s - loss: 0.5621 - acc: 0.7041 - val_loss: 0.5079 - val_acc: 0.7333
Epoch 9/50
 - 0s - loss: 0.5501 - acc: 0.6991 - val_loss: 0.4959 - val_acc: 0.7333
Epoch 10/50
 - 0s - loss: 0.5395 - acc: 0.7216 - val_loss: 0.4933 - val_acc: 0.7556
Epoch 11/50
 - 0s - loss: 0.5278 - acc: 0.7416 - val_loss: 0.4792 - val_acc: 0.8000
Epoch 12/50
 - 0s - loss: 0.5138 - acc: 

In [313]:
scores = model.evaluate(X_train, y_train)



In [314]:
scores[1]

0.8204264866248794

In [315]:
survived_predict= model.predict(X_test)

In [316]:
survived_predict

array([[0.09383187],
       [0.37162837],
       [0.10720596],
       [0.08764999],
       [0.45182556],
       [0.18946758],
       [0.6011451 ],
       [0.15128027],
       [0.55957156],
       [0.10707468],
       [0.08201936],
       [0.19749965],
       [0.91421914],
       [0.09235413],
       [0.9299372 ],
       [0.832945  ],
       [0.15511312],
       [0.13452967],
       [0.42700577],
       [0.46920645],
       [0.31067377],
       [0.3747485 ],
       [0.9199188 ],
       [0.34753826],
       [0.9244136 ],
       [0.05721897],
       [0.9491669 ],
       [0.13070926],
       [0.21698686],
       [0.11455628],
       [0.10916533],
       [0.15845303],
       [0.4395934 ],
       [0.45406199],
       [0.35236576],
       [0.1411064 ],
       [0.41785306],
       [0.44619146],
       [0.0912644 ],
       [0.09653448],
       [0.09705956],
       [0.23662116],
       [0.06581914],
       [0.704183  ],
       [0.92834294],
       [0.09124987],
       [0.2758208 ],
       [0.101

In [317]:
survived_predict = np.array(survived_predict)
survived_predict = np.round(survived_predict)

In [318]:
survived_predict

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],

In [319]:
submission=pd.DataFrame({'PassengerId':test['PassengerId'], 'Survived':survived_predict[0:418,0]})

In [321]:
submission.to_csv('submission1.csv', index=False)