# 1、导入模块

In [1]:
from __future__ import print_function
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.optimizers import SGD, RMSprop, Adam
from keras.layers import Dense, Activation, Dropout

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# 2、读取数据

In [2]:
raw_train = pd.read_csv('./train.csv', index_col=0)
raw_train['is_test'] = 0
raw_test = pd.read_csv('./test.csv', index_col=0)
raw_test['is_test'] = 1

all_data = pd.concat((raw_train, raw_test), axis=0)


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  


# 3、对数据进行预处理

In [3]:
def get_title_last_name(name):
    full_name = name.str.split(', ', n=0, expand=True)
    last_name = full_name[0]
    titles = full_name[1].str.split('.', n=0, expand=True)
    titles = titles[0]
    return(titles)

def get_titles_from_names(df):
    df['Title'] = get_title_last_name(df['Name'])
    df = df.drop(['Name'], axis=1)
    return(df)

def get_dummy_cats(df):
    return(pd.get_dummies(df, columns=['Title', 'Pclass', 'Sex', 'Embarked',
                                       'Cabin', 'Cabin_letter']))

def get_cabin_letter(df):    
    df['Cabin'].fillna('Z', inplace=True)
    df['Cabin_letter'] = df['Cabin'].str[0]    
    return(df)

def process_data(df):
    # preprocess titles, cabin, embarked
    df = get_titles_from_names(df)    
    df['Embarked'].fillna('S', inplace=True)
    df = get_cabin_letter(df)
    
    # drop remaining features
    df = df.drop(['Ticket', 'Fare'], axis=1)
    
    # create dummies for categorial features
    df = get_dummy_cats(df)
    
    return(df)

proc_data = process_data(all_data)
proc_train = proc_data[proc_data['is_test'] == 0]
proc_test = proc_data[proc_data['is_test'] == 1]

In [4]:
for_age_train = proc_data.drop(['Survived', 'is_test'], axis=1).dropna(axis=0)
X_train_age = for_age_train.drop('Age', axis=1)
y_train_age = for_age_train['Age']

In [5]:
train_data = proc_train


In [6]:
train_data['Age'].loc[train_data['Age'].isnull()] = train_data.Age.median()
#中位数补充缺失值

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [7]:
test_data = proc_test
to_pred = test_data.loc[test_data['Age'].isnull()].drop(
          ['Age', 'Survived', 'is_test'], axis=1)

#test_data['Age'].loc[test_data['Age'].isnull()] = p

In [8]:
y = pd.get_dummies(train_data['Survived'])


In [9]:
X = train_data.drop(['Survived', 'is_test'], axis=1)


# 4、创建模型

In [10]:
# create model
model = Sequential()
model.add(Dense(input_dim=X.shape[1], units=128,
                 kernel_initializer='normal', bias_initializer='zeros'))
model.add(Activation('relu'))

for i in range(0, 15):
    model.add(Dense(units=128, kernel_initializer='normal',
                     bias_initializer='zeros'))
    model.add(Activation('relu'))
    model.add(Dropout(.40))

model.add(Dense(units=2))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
model.fit(X.values, y.values, epochs=600, verbose=2)


Epoch 1/600
 - 1s - loss: 0.6753 - acc: 0.6139
Epoch 2/600
 - 0s - loss: 0.6717 - acc: 0.6162
Epoch 3/600
 - 0s - loss: 0.6669 - acc: 0.6162
Epoch 4/600
 - 0s - loss: 0.6562 - acc: 0.6162
Epoch 5/600
 - 0s - loss: 0.6618 - acc: 0.6162
Epoch 6/600
 - 0s - loss: 0.6080 - acc: 0.6308
Epoch 7/600
 - 0s - loss: 0.5504 - acc: 0.7935
Epoch 8/600
 - 0s - loss: 0.5763 - acc: 0.7901
Epoch 9/600
 - 0s - loss: 0.5136 - acc: 0.8070
Epoch 10/600
 - 0s - loss: 0.5174 - acc: 0.8182
Epoch 11/600
 - 0s - loss: 0.5178 - acc: 0.8047
Epoch 12/600
 - 0s - loss: 0.4714 - acc: 0.8339
Epoch 13/600
 - 0s - loss: 0.4707 - acc: 0.8316
Epoch 14/600
 - 0s - loss: 0.4717 - acc: 0.8440
Epoch 15/600
 - 0s - loss: 0.4578 - acc: 0.8474
Epoch 16/600
 - 0s - loss: 0.4481 - acc: 0.8507
Epoch 17/600
 - 0s - loss: 0.4227 - acc: 0.8575
Epoch 18/600
 - 0s - loss: 0.4473 - acc: 0.8474
Epoch 19/600
 - 0s - loss: 0.4258 - acc: 0.8586
Epoch 20/600
 - 0s - loss: 0.4392 - acc: 0.8440
Epoch 21/600
 - 0s - loss: 0.4448 - acc: 0.8350
E

Epoch 171/600
 - 0s - loss: 0.3046 - acc: 0.8945
Epoch 172/600
 - 0s - loss: 0.3027 - acc: 0.8945
Epoch 173/600
 - 0s - loss: 0.3117 - acc: 0.8900
Epoch 174/600
 - 0s - loss: 0.3095 - acc: 0.8923
Epoch 175/600
 - 0s - loss: 0.3125 - acc: 0.8866
Epoch 176/600
 - 0s - loss: 0.3041 - acc: 0.8934
Epoch 177/600
 - 0s - loss: 0.3131 - acc: 0.8855
Epoch 178/600
 - 0s - loss: 0.3136 - acc: 0.8833
Epoch 179/600
 - 0s - loss: 0.3063 - acc: 0.8810
Epoch 180/600
 - 0s - loss: 0.2891 - acc: 0.9001
Epoch 181/600
 - 0s - loss: 0.4088 - acc: 0.8350
Epoch 182/600
 - 0s - loss: 0.4734 - acc: 0.7980
Epoch 183/600
 - 0s - loss: 0.4189 - acc: 0.8328
Epoch 184/600
 - 0s - loss: 0.3687 - acc: 0.8664
Epoch 185/600
 - 0s - loss: 0.3789 - acc: 0.8732
Epoch 186/600
 - 0s - loss: 0.3332 - acc: 0.8765
Epoch 187/600
 - 0s - loss: 0.3110 - acc: 0.8844
Epoch 188/600
 - 0s - loss: 0.4022 - acc: 0.8373
Epoch 189/600
 - 0s - loss: 0.3657 - acc: 0.8608
Epoch 190/600
 - 0s - loss: 0.3228 - acc: 0.8810
Epoch 191/600
 - 0s 

 - 0s - loss: 0.4212 - acc: 0.8204
Epoch 339/600
 - 0s - loss: 0.3614 - acc: 0.8597
Epoch 340/600
 - 0s - loss: 0.3036 - acc: 0.8900
Epoch 341/600
 - 0s - loss: 0.3047 - acc: 0.8923
Epoch 342/600
 - 0s - loss: 0.3107 - acc: 0.8934
Epoch 343/600
 - 0s - loss: 0.3040 - acc: 0.8889
Epoch 344/600
 - 0s - loss: 0.3022 - acc: 0.8866
Epoch 345/600
 - 0s - loss: 0.3088 - acc: 0.8889
Epoch 346/600
 - 0s - loss: 0.3005 - acc: 0.8956
Epoch 347/600
 - 0s - loss: 0.2980 - acc: 0.8889
Epoch 348/600
 - 0s - loss: 0.3019 - acc: 0.8911
Epoch 349/600
 - 0s - loss: 0.2873 - acc: 0.8979
Epoch 350/600
 - 0s - loss: 0.3037 - acc: 0.8934
Epoch 351/600
 - 0s - loss: 0.3014 - acc: 0.8934
Epoch 352/600
 - 0s - loss: 0.2899 - acc: 0.9001
Epoch 353/600
 - 0s - loss: 0.2934 - acc: 0.8956
Epoch 354/600
 - 0s - loss: 0.2875 - acc: 0.8911
Epoch 355/600
 - 0s - loss: 0.3065 - acc: 0.8956
Epoch 356/600
 - 0s - loss: 0.2966 - acc: 0.8900
Epoch 357/600
 - 0s - loss: 0.2994 - acc: 0.8956
Epoch 358/600
 - 0s - loss: 0.3110

Epoch 506/600
 - 0s - loss: 0.3059 - acc: 0.8934
Epoch 507/600
 - 0s - loss: 0.2947 - acc: 0.8923
Epoch 508/600
 - 0s - loss: 0.2908 - acc: 0.8934
Epoch 509/600
 - 0s - loss: 0.2808 - acc: 0.8945
Epoch 510/600
 - 0s - loss: 0.3049 - acc: 0.8923
Epoch 511/600
 - 0s - loss: 0.2981 - acc: 0.8934
Epoch 512/600
 - 0s - loss: 0.2880 - acc: 0.9012
Epoch 513/600
 - 0s - loss: 0.3076 - acc: 0.8777
Epoch 514/600
 - 0s - loss: 0.2942 - acc: 0.8855
Epoch 515/600
 - 0s - loss: 0.2927 - acc: 0.8878
Epoch 516/600
 - 0s - loss: 0.2934 - acc: 0.8900
Epoch 517/600
 - 0s - loss: 0.2942 - acc: 0.8934
Epoch 518/600
 - 0s - loss: 0.2897 - acc: 0.8934
Epoch 519/600
 - 0s - loss: 0.2820 - acc: 0.8979
Epoch 520/600
 - 0s - loss: 0.2930 - acc: 0.8934
Epoch 521/600
 - 0s - loss: 0.2932 - acc: 0.8934
Epoch 522/600
 - 0s - loss: 0.3355 - acc: 0.9012
Epoch 523/600
 - 0s - loss: 0.3102 - acc: 0.8833
Epoch 524/600
 - 0s - loss: 0.3105 - acc: 0.8687
Epoch 525/600
 - 0s - loss: 0.2833 - acc: 0.8979
Epoch 526/600
 - 0s 

<keras.callbacks.History at 0x2879871d8d0>

In [18]:
p_survived = model.predict_classes(test_data.drop(['Survived', 'is_test'], axis=1).values)


In [19]:
general = pd.read_csv('gender_submission.csv')
data = {
    'PassengerId':general['PassengerId'],
    'Survived':p_survived
}


res = pd.DataFrame(data)

res.to_csv('result_3.csv',index=False,sep=',')

## 准确率： 0.76076