# Titanic: Machine Learning from Disaster

https://www.kaggle.com/c/titanic


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

## Load Dataset

In [0]:
# Code to read csv file into colaboratory:
!pip install -U -q PyDrive
 
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
 
# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [3]:
# 2. Get the file info in the folder 
# file_list = drive.ListFile({'q': "'<FOLDER ID>' in parents and trashed=false"}).GetList()
file_list = drive.ListFile({'q': "'1ETeRvrK-KspSRVPeHdKqKAXJ1tGJ_TFd' in parents and trashed=false"}).GetList()
for file1 in file_list:
  print('title: %s, id: %s' % (file1['title'], file1['id']))

title: train.csv, id: 1vD6LWZJkYZ6RJvpDq8ZkU0xhBMBXgz-u
title: test.csv, id: 1Vl06bTerImNMpoYc3zKY_9dvE3Ksf3Ss


In [0]:
#3. Get the file
# downloaded = drive.CreateFile({'id':'<FILE_ID>'})
# downloaded.GetContentFile('<FILE_TITLE>')

train_downloaded = drive.CreateFile({'id':'1vD6LWZJkYZ6RJvpDq8ZkU0xhBMBXgz-u'})
train_downloaded.GetContentFile('train.csv') 

test_downloaded = drive.CreateFile({'id':'1Vl06bTerImNMpoYc3zKY_9dvE3Ksf3Ss'})
test_downloaded.GetContentFile('test.csv') 

In [5]:
train = pd.read_csv("train.csv", index_col=["PassengerId"])
train.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [6]:
test = pd.read_csv("test.csv", index_col=["PassengerId"])
test.head()

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


## Missing data

In [7]:
train.isnull().sum()

Survived      0
Pclass        0
Name          0
Sex           0
Age         177
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [8]:
test.isnull().sum()

Pclass        0
Name          0
Sex           0
Age          86
SibSp         0
Parch         0
Ticket        0
Fare          1
Cabin       327
Embarked      0
dtype: int64

In [9]:
all_data = pd.concat([train, test])
all_data.head()

Unnamed: 0_level_0,Age,Cabin,Embarked,Fare,Name,Parch,Pclass,Sex,SibSp,Survived,Ticket
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,22.0,,S,7.25,"Braund, Mr. Owen Harris",0,3,male,1,0.0,A/5 21171
2,38.0,C85,C,71.2833,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,1,female,1,1.0,PC 17599
3,26.0,,S,7.925,"Heikkinen, Miss. Laina",0,3,female,0,1.0,STON/O2. 3101282
4,35.0,C123,S,53.1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,1,female,1,1.0,113803
5,35.0,,S,8.05,"Allen, Mr. William Henry",0,3,male,0,0.0,373450


### 1) Embarked

In [10]:
train[train["Embarked"].isnull()]

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
62,1,1,"Icard, Miss. Amelie",female,38.0,0,0,113572,80.0,B28,
830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0,B28,


In [11]:
# 1등석이고 요금이 75이상 85인 데이터의 탑승지
all_data[(all_data["Pclass"] == 1) & (all_data["Fare"] > 75) & (all_data["Fare"] < 85)].groupby("Embarked").agg(['mean', 'count'])

Unnamed: 0_level_0,Age,Age,Fare,Fare,Parch,Parch,Pclass,Pclass,SibSp,SibSp,Survived,Survived
Unnamed: 0_level_1,mean,count,mean,count,mean,count,mean,count,mean,count,mean,count
Embarked,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
C,44.478261,23,79.151168,25,0.32,25,1,25,0.48,25,0.8125,16
S,38.611111,18,80.056011,18,0.555556,18,1,18,0.666667,18,0.615385,13


In [12]:
train["Embarked"] = train["Embarked"].fillna("C")
train.isnull().sum()

Survived      0
Pclass        0
Name          0
Sex           0
Age         177
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      0
dtype: int64

### 2) Fare

In [13]:
test[test["Fare"].isnull()]

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S


In [14]:
# 3등석이고 탑승지가 S인 요금 평균
mean_fare = all_data[(all_data["Pclass"] == 3) & (all_data["Embarked"] == "S")]["Fare"].mean()
test["Fare"] = test["Fare"].fillna(mean_fare)
test.isnull().sum()

Pclass        0
Name          0
Sex           0
Age          86
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       327
Embarked      0
dtype: int64

### 3) Age

In [15]:
all_mean_age = all_data["Age"].mean()
print(all_mean_age)

train["Age"] = train["Age"].fillna(all_data["Age"].mean())
test["Age"] = test["Age"].fillna(all_data["Age"].mean())

# 좌석등급, 성별에 따른 연령층이 차이가 있음.. 이거 전체 루프 돌지 않고 어케 넣어주지??
ages = all_data.groupby(['Pclass', 'Sex'])['Age'].mean()
print(ages)

29.881137667304014
Pclass  Sex   
1       female    37.037594
        male      41.029272
2       female    27.499223
        male      30.815380
3       female    22.185329
        male      25.962264
Name: Age, dtype: float64


In [16]:
train.isnull().sum()

Survived      0
Pclass        0
Name          0
Sex           0
Age           0
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      0
dtype: int64

In [17]:
test.isnull().sum()

Pclass        0
Name          0
Sex           0
Age           0
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       327
Embarked      0
dtype: int64

## Encoding

In [0]:
def create_dummies(df,column_name):
    dummies = pd.get_dummies(df[column_name],prefix=column_name)
    df = pd.concat([df,dummies],axis=1)
    return df

train = create_dummies(train,"Pclass")
train = create_dummies(train,"Embarked")

In [0]:
test = create_dummies(test,"Pclass")
test = create_dummies(test,"Embarked")

In [20]:
# 성별을 feature 로 사용하기위해 수치형으로 encoding
train.loc[train["Sex"] == "male", "Enc_Sex"] = 0
train.loc[train["Sex"] == "female", "Enc_Sex"] = 1
train["Enc_Sex"] = train["Enc_Sex"].astype(int)

print(train.shape)
train.head()

(891, 18)


Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S,Enc_Sex
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0,0,1,0,0,1,0
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,1,0,0,1,0,0,1
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,0,1,0,0,1,1
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,1,0,0,0,0,1,1
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0,0,1,0,0,1,0


In [21]:
# test 데이터를 가지고 predict 를 해야 하기 때문에 train data 와 동일하게 전처리 해야 함
test.loc[test["Sex"] == "male", "Enc_Sex"] = 0
test.loc[test["Sex"] == "female", "Enc_Sex"] = 1
test["Enc_Sex"] = test["Enc_Sex"].astype(int)

print(test.shape)
test.head()

(418, 17)


Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S,Enc_Sex
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0,0,1,0,1,0,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,0,0,1,0,0,1,1
894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0,1,0,0,1,0,0
895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0,0,1,0,0,1,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,0,0,1,0,0,1,1


In [22]:
train["Family"] = train["SibSp"] + train["Parch"]
train.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S,Enc_Sex,Family
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0,0,1,0,0,1,0,1
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,1,0,0,1,0,0,1,1
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,0,1,0,0,1,1,0
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,1,0,0,0,0,1,1,1
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0,0,1,0,0,1,0,0


In [23]:
test["Family"] = test["SibSp"] + test["Parch"]
test.head()

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S,Enc_Sex,Family
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0,0,1,0,1,0,0,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,0,0,1,0,0,1,1,1
894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0,1,0,0,1,0,0,0
895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0,0,1,0,0,1,0,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,0,0,1,0,0,1,1,2


## Train

In [24]:
#feature_names = ["Pclass", "Enc_Sex", "Age", "SibSp", "Parch", "Fare", "Embarked_C", "Embarked_Q", "Embarked_S"]
# 왜 나는 Pclass를 one hot encoding 해야 더 점수가 잘 나오는지 모르겠음
feature_names = ["Pclass_1", "Pclass_2", "Pclass_3", "Enc_Sex", "Age", "Family", "Fare", "Embarked_C", "Embarked_Q", "Embarked_S"]
#feature_names = ["Pclass", "Enc_Sex", "SibSp", "Parch", "Fare", "Embarked_C", "Embarked_Q", "Embarked_S"]

X = train[feature_names].values

y = train['Survived'].values
y = y.astype(float).reshape(-1, 1)

print(X)

[[0. 0. 1. ... 0. 0. 1.]
 [1. 0. 0. ... 1. 0. 0.]
 [0. 0. 1. ... 0. 0. 1.]
 ...
 [0. 0. 1. ... 0. 0. 1.]
 [1. 0. 0. ... 1. 0. 0.]
 [0. 0. 1. ... 0. 1. 0.]]


In [0]:
# X_train = train[feature_names].values
# X_train = X_train.astype(float)
# y_train = train['Survived'].values
# y_train = y_train.astype(float).reshape(-1, 1)
# print(X_train.shape, y_train.shape)

from sklearn.model_selection import train_test_split

X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.1, random_state=0)

In [26]:
print(X_train.shape, y_train.shape)
print(X_dev.shape, y_dev.shape)

(801, 10) (801, 1)
(90, 10) (90, 1)


In [27]:
X_test = test[feature_names].values
X_test = X_test.astype(float)

print(X_test)

[[0. 0. 1. ... 0. 1. 0.]
 [0. 0. 1. ... 0. 0. 1.]
 [0. 1. 0. ... 0. 1. 0.]
 ...
 [0. 0. 1. ... 0. 0. 1.]
 [0. 0. 1. ... 0. 0. 1.]
 [0. 0. 1. ... 1. 0. 0.]]


In [28]:
id_test = test.index.get_values()
id_test = id_test.reshape(-1, 1)
print(id_test)

[[ 892]
 [ 893]
 [ 894]
 [ 895]
 [ 896]
 [ 897]
 [ 898]
 [ 899]
 [ 900]
 [ 901]
 [ 902]
 [ 903]
 [ 904]
 [ 905]
 [ 906]
 [ 907]
 [ 908]
 [ 909]
 [ 910]
 [ 911]
 [ 912]
 [ 913]
 [ 914]
 [ 915]
 [ 916]
 [ 917]
 [ 918]
 [ 919]
 [ 920]
 [ 921]
 [ 922]
 [ 923]
 [ 924]
 [ 925]
 [ 926]
 [ 927]
 [ 928]
 [ 929]
 [ 930]
 [ 931]
 [ 932]
 [ 933]
 [ 934]
 [ 935]
 [ 936]
 [ 937]
 [ 938]
 [ 939]
 [ 940]
 [ 941]
 [ 942]
 [ 943]
 [ 944]
 [ 945]
 [ 946]
 [ 947]
 [ 948]
 [ 949]
 [ 950]
 [ 951]
 [ 952]
 [ 953]
 [ 954]
 [ 955]
 [ 956]
 [ 957]
 [ 958]
 [ 959]
 [ 960]
 [ 961]
 [ 962]
 [ 963]
 [ 964]
 [ 965]
 [ 966]
 [ 967]
 [ 968]
 [ 969]
 [ 970]
 [ 971]
 [ 972]
 [ 973]
 [ 974]
 [ 975]
 [ 976]
 [ 977]
 [ 978]
 [ 979]
 [ 980]
 [ 981]
 [ 982]
 [ 983]
 [ 984]
 [ 985]
 [ 986]
 [ 987]
 [ 988]
 [ 989]
 [ 990]
 [ 991]
 [ 992]
 [ 993]
 [ 994]
 [ 995]
 [ 996]
 [ 997]
 [ 998]
 [ 999]
 [1000]
 [1001]
 [1002]
 [1003]
 [1004]
 [1005]
 [1006]
 [1007]
 [1008]
 [1009]
 [1010]
 [1011]
 [1012]
 [1013]
 [1014]
 [1015]
 [1016]


In [29]:
print(X_test.shape, id_test.shape)

(418, 10) (418, 1)


In [0]:
seed = 37
input_size = X_train.shape[1] # number of features

# kaggle score => 0.76076
learning_rate = 0.001 # most common value for Adam
epochs = 8500

# kaggle score => 0.75119
# learning_rate = 0.01
# epochs = 18000

In [0]:
graph = tf.Graph()
with graph.as_default():
    tf.set_random_seed(seed)
    np.random.seed(seed)

    X_input = tf.placeholder(dtype=tf.float32, shape=[None, input_size], name='X_input')
    y_input = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='y_input')
    
    W1 = tf.Variable(tf.random_normal(shape=[input_size, 1], seed=seed), name='W1')
    b1 = tf.Variable(tf.random_normal(shape=[1], seed=seed), name='b1')
    sigm = tf.nn.sigmoid(tf.add(tf.matmul(X_input, W1), b1), name='pred')
    
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_input, logits=sigm, name='loss'))
    #train_steps = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    # GradientDescentOptimizer보다 AdamOptimizer이 더 점수 높게나옴
    train_steps = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    pred = tf.cast(tf.greater_equal(sigm, 0.5), tf.float32, name='pred') # 1 if >= 0.5
    acc = tf.reduce_mean(tf.cast(tf.equal(pred, y_input), tf.float32), name='acc')
    
    init_var = tf.global_variables_initializer()

In [0]:
train_feed_dict = {X_input: X_train, y_input: y_train}
dev_feed_dict = {X_input: X_dev, y_input: y_dev}
test_feed_dict = {X_input: X_test} # no y_input since the goal is to predict it

In [33]:
sess = tf.Session(graph=graph)
sess.run(init_var)

cur_loss = sess.run(loss, feed_dict=train_feed_dict)
train_acc = sess.run(acc, feed_dict=train_feed_dict)
test_acc = sess.run(acc, feed_dict=dev_feed_dict)
print('step 0: loss {0:.5f}, train_acc {1:.2f}%, test_acc {2:.2f}%'.format(cur_loss, 100*train_acc, 100*test_acc))

for step in range(1, epochs+1):
    sess.run(train_steps, feed_dict=train_feed_dict)
    cur_loss = sess.run(loss, feed_dict=train_feed_dict)
    train_acc = sess.run(acc, feed_dict=train_feed_dict)
    test_acc = sess.run(acc, feed_dict=dev_feed_dict)
    
    if step%100 != 0: # print result every 100 steps
        continue
    print('step {3}: loss {0:.5f}, train_acc {1:.2f}%, test_acc {2:.2f}%'.format(cur_loss, 100*train_acc, 100*test_acc, step))

step 0: loss 0.68755, train_acc 63.80%, test_acc 56.67%
step 100: loss 0.68692, train_acc 63.92%, test_acc 57.78%
step 200: loss 0.68660, train_acc 63.92%, test_acc 57.78%
step 300: loss 0.68622, train_acc 64.04%, test_acc 57.78%
step 400: loss 0.68578, train_acc 64.42%, test_acc 58.89%
step 500: loss 0.68524, train_acc 64.54%, test_acc 58.89%
step 600: loss 0.68448, train_acc 64.79%, test_acc 58.89%
step 700: loss 0.68333, train_acc 65.29%, test_acc 58.89%
step 800: loss 0.68201, train_acc 66.04%, test_acc 61.11%
step 900: loss 0.68080, train_acc 66.29%, test_acc 61.11%
step 1000: loss 0.67921, train_acc 66.92%, test_acc 63.33%
step 1100: loss 0.66868, train_acc 70.66%, test_acc 70.00%
step 1200: loss 0.66732, train_acc 70.79%, test_acc 71.11%
step 1300: loss 0.66657, train_acc 71.04%, test_acc 71.11%
step 1400: loss 0.66598, train_acc 71.04%, test_acc 71.11%
step 1500: loss 0.66545, train_acc 71.41%, test_acc 71.11%
step 1600: loss 0.66493, train_acc 71.54%, test_acc 71.11%
step 1700

In [0]:
y_pred = sess.run(pred, feed_dict=test_feed_dict).astype(int)
prediction = pd.DataFrame(np.concatenate([id_test, y_pred], axis=1), columns=['PassengerId', 'Survived'])
prediction = prediction.set_index("PassengerId")

In [35]:
prediction.head()

Unnamed: 0_level_0,Survived
PassengerId,Unnamed: 1_level_1
892,0
893,0
894,0
895,0
896,0


In [0]:
from google.colab import files

file_name = "titanic-tensorflow_submission.csv"
prediction.to_csv(file_name)
files.download(file_name)