In [1]:
import re
import pandas as pd
import numpy as np
import tensorflow as tf
import keras
from keras.layers import Normalization
from tensorflow.keras.layers import IntegerLookup, StringLookup, CategoryEncoding
from tensorflow.keras import regularizers


In [2]:
train_df = pd.read_csv("../input/spaceship-titanic/train.csv")
test_df = pd.read_csv("../input/spaceship-titanic/test.csv")
train_df.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True


In [3]:
train_df = train_df.drop(['PassengerId', 'Name'], axis = 1)
submission_pid = test_df['PassengerId']

test_df = test_df.drop(['PassengerId', 'Name'], axis = 1)

train_df.isna().sum()

HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age             179
VIP             203
RoomService     181
FoodCourt       183
ShoppingMall    208
Spa             183
VRDeck          188
Transported       0
dtype: int64

In [4]:
num_cols = ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
cat_cols = ['HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP']

for i in num_cols:
    train_df[i] = train_df[i].fillna(train_df[i].mean())
    test_df[i] = test_df[i].fillna(test_df[i].mean())

for i in cat_cols:
    train_df[i] = train_df[i].fillna(train_df[i].value_counts().index[0])
    test_df[i] = test_df[i].fillna(test_df[i].value_counts().index[0])
    
train_df.isnull().sum()

HomePlanet      0
CryoSleep       0
Cabin           0
Destination     0
Age             0
VIP             0
RoomService     0
FoodCourt       0
ShoppingMall    0
Spa             0
VRDeck          0
Transported     0
dtype: int64

In [5]:
train_cabin_re = train_df['Cabin'].str.extract(r"(^.*)\/(.*)\/(.*)")
test_cabin_re = test_df['Cabin'].str.extract(r"(^.*)\/(.*)\/(.*)")

train_df['Cabin'] = train_cabin_re[0] + train_cabin_re[2]
train_df['CabinNum'] = train_cabin_re[1].astype('float64')

test_df['Cabin'] = test_cabin_re[0] + test_cabin_re[2]
test_df['CabinNum'] = test_cabin_re[1].astype('float64')

print(train_df['CabinNum'])
train_df['Cabin']

0          0.0
1          0.0
2          0.0
3          0.0
4          1.0
         ...  
8688      98.0
8689    1499.0
8690    1500.0
8691     608.0
8692     608.0
Name: CabinNum, Length: 8693, dtype: float64


0       BP
1       FS
2       AS
3       AS
4       FS
        ..
8688    AP
8689    GS
8690    GS
8691    ES
8692    ES
Name: Cabin, Length: 8693, dtype: object

In [6]:
train_df.loc[:,'CryoSleep'] = train_df.loc[:,'CryoSleep'].apply(lambda x: 0 if x == False else 1)
test_df.loc[:,'CryoSleep'] = test_df.loc[:,'CryoSleep'].apply(lambda x: 0 if x == False else 1)
train_df['CryoSleep'].unique()

array([0, 1])

In [7]:
train_df.loc[:,'VIP'] = train_df.loc[:,'VIP'].apply(lambda x: 0 if x == False else 1)
test_df.loc[:,'VIP'] = test_df.loc[:,'VIP'].apply(lambda x: 0 if x == False else 1)
train_df['VIP'].unique()

array([0, 1])

In [8]:
train_df.loc[:,'Transported'] = train_df.loc[:,'Transported'].apply(lambda x: 0 if x == False else 1)
train_df

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,CabinNum
0,Europa,0,BP,TRAPPIST-1e,39.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0
1,Earth,0,FS,TRAPPIST-1e,24.0,0,109.0,9.0,25.0,549.0,44.0,1,0.0
2,Europa,0,AS,TRAPPIST-1e,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0,0.0
3,Europa,0,AS,TRAPPIST-1e,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0,0.0
4,Earth,0,FS,TRAPPIST-1e,16.0,0,303.0,70.0,151.0,565.0,2.0,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,Europa,0,AP,55 Cancri e,41.0,1,0.0,6819.0,0.0,1643.0,74.0,0,98.0
8689,Earth,1,GS,PSO J318.5-22,18.0,0,0.0,0.0,0.0,0.0,0.0,0,1499.0
8690,Earth,0,GS,TRAPPIST-1e,26.0,0,0.0,0.0,1872.0,1.0,0.0,1,1500.0
8691,Europa,0,ES,55 Cancri e,32.0,0,0.0,1049.0,0.0,353.0,3235.0,0,608.0


In [9]:
train_df.loc[:,'Destination'] = train_df['Destination'].apply(lambda x: 0 if x=='TRAPPIST-1e' else (1 if x=='PSO J318.5-22' else 2))
test_df.loc[:,'Destination'] = test_df['Destination'].apply(lambda x: 0 if x=='TRAPPIST-1e' else (1 if x=='PSO J318.5-22' else 2))
train_df

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,CabinNum
0,Europa,0,BP,0,39.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0
1,Earth,0,FS,0,24.0,0,109.0,9.0,25.0,549.0,44.0,1,0.0
2,Europa,0,AS,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0,0.0
3,Europa,0,AS,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0,0.0
4,Earth,0,FS,0,16.0,0,303.0,70.0,151.0,565.0,2.0,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,Europa,0,AP,2,41.0,1,0.0,6819.0,0.0,1643.0,74.0,0,98.0
8689,Earth,1,GS,1,18.0,0,0.0,0.0,0.0,0.0,0.0,0,1499.0
8690,Earth,0,GS,0,26.0,0,0.0,0.0,1872.0,1.0,0.0,1,1500.0
8691,Europa,0,ES,2,32.0,0,0.0,1049.0,0.0,353.0,3235.0,0,608.0


In [10]:
train_df.loc[:,'HomePlanet'] = train_df['HomePlanet'].apply(lambda x: 0 if x=='Europa' else (1 if x=='Earth' else 2))
test_df.loc[:,'HomePlanet'] = test_df['HomePlanet'].apply(lambda x: 0 if x=='Europa' else (1 if x=='Earth' else 2))
train_df

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,CabinNum
0,0,0,BP,0,39.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0
1,1,0,FS,0,24.0,0,109.0,9.0,25.0,549.0,44.0,1,0.0
2,0,0,AS,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0,0.0
3,0,0,AS,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0,0.0
4,1,0,FS,0,16.0,0,303.0,70.0,151.0,565.0,2.0,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0,0,AP,2,41.0,1,0.0,6819.0,0.0,1643.0,74.0,0,98.0
8689,1,1,GS,1,18.0,0,0.0,0.0,0.0,0.0,0.0,0,1499.0
8690,1,0,GS,0,26.0,0,0.0,0.0,1872.0,1.0,0.0,1,1500.0
8691,0,0,ES,2,32.0,0,0.0,1049.0,0.0,353.0,3235.0,0,608.0


In [11]:
val_df = train_df.sample(frac=0.15, random_state=28296)
train_df = train_df.drop(val_df.index)

train_target = train_df.pop('Transported')
val_target = val_df.pop('Transported')
train_df

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,CabinNum
0,0,0,BP,0,39.0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0,FS,0,24.0,0,109.0,9.0,25.0,549.0,44.0,0.0
2,0,0,AS,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0.0
3,0,0,AS,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0.0
4,1,0,FS,0,16.0,0,303.0,70.0,151.0,565.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0,0,AP,2,41.0,1,0.0,6819.0,0.0,1643.0,74.0,98.0
8689,1,1,GS,1,18.0,0,0.0,0.0,0.0,0.0,0.0,1499.0
8690,1,0,GS,0,26.0,0,0.0,0.0,1872.0,1.0,0.0,1500.0
8691,0,0,ES,2,32.0,0,0.0,1049.0,0.0,353.0,3235.0,608.0


In [12]:
train_ds = tf.data.Dataset.from_tensor_slices((dict(train_df), train_target))
val_ds = tf.data.Dataset.from_tensor_slices((dict(val_df), val_target))
test_ds = tf.data.Dataset.from_tensor_slices(dict(test_df))

train_ds = train_ds.shuffle(buffer_size=len(train_ds))
val_ds = val_ds.shuffle(buffer_size=len(val_ds))

train_ds = train_ds.batch(20)
val_ds = val_ds.batch(20)
test_ds = test_ds.batch(20)
train_ds

2022-09-01 02:55:29.285542: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


<BatchDataset shapes: ({HomePlanet: (None,), CryoSleep: (None,), Cabin: (None,), Destination: (None,), Age: (None,), VIP: (None,), RoomService: (None,), FoodCourt: (None,), ShoppingMall: (None,), Spa: (None,), VRDeck: (None,), CabinNum: (None,)}, (None,)), types: ({HomePlanet: tf.int64, CryoSleep: tf.int64, Cabin: tf.string, Destination: tf.int64, Age: tf.float64, VIP: tf.int64, RoomService: tf.float64, FoodCourt: tf.float64, ShoppingMall: tf.float64, Spa: tf.float64, VRDeck: tf.float64, CabinNum: tf.float64}, tf.int64)>

In [13]:
train_df

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,CabinNum
0,0,0,BP,0,39.0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0,FS,0,24.0,0,109.0,9.0,25.0,549.0,44.0,0.0
2,0,0,AS,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0.0
3,0,0,AS,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0.0
4,1,0,FS,0,16.0,0,303.0,70.0,151.0,565.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0,0,AP,2,41.0,1,0.0,6819.0,0.0,1643.0,74.0,98.0
8689,1,1,GS,1,18.0,0,0.0,0.0,0.0,0.0,0.0,1499.0
8690,1,0,GS,0,26.0,0,0.0,0.0,1872.0,1.0,0.0,1500.0
8691,0,0,ES,2,32.0,0,0.0,1049.0,0.0,353.0,3235.0,608.0


In [14]:
home_p = keras.Input(shape=(1,), name='HomePlanet', dtype='float64')
cryo = keras.Input(shape=(1,), name='CryoSleep', dtype='float64')
dest = keras.Input(shape=(1,), name='Destination', dtype='float64')
age = keras.Input(shape=(1,), name='Age', dtype='float64')
vip = keras.Input(shape=(1,), name='VIP', dtype='float64')
r_service = keras.Input(shape=(1,), name='RoomService', dtype='float64')
f_court = keras.Input(shape=(1,), name='FoodCourt', dtype='float64')
s_mall = keras.Input(shape=(1,), name='ShoppingMall', dtype='float64')
spa = keras.Input(shape=(1,), name='Spa', dtype='float64')
v_deck = keras.Input(shape=(1,), name='VRDeck', dtype='float64')
cabin = keras.Input(shape=(1,), name='Cabin', dtype='string')
cabin_num = keras.Input(shape=(1,), name='CabinNum', dtype='float64')

all_inputs = [home_p, cryo, dest, age, vip, r_service, f_court, s_mall, spa, v_deck, cabin, cabin_num]
all_inputs

[<KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'HomePlanet')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'CryoSleep')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'Destination')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'Age')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'VIP')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'RoomService')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'FoodCourt')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'ShoppingMall')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'Spa')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'VRDeck')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Cabin')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'CabinNum')>]

In [15]:
def get_feature(input_name, name, ds):
    feature = ds.map(lambda x, y: x[name])
    feature = feature.map(lambda x: tf.expand_dims(x, -1))
    
    normilizer = Normalization()
    normilizer.adapt(feature)
    feature = normilizer(input_name)
    return feature

def get_num_cat_feature(input_name, name, ds):
    feature = ds.map(lambda x, y: x[name])
    feature = feature.map(lambda x: tf.expand_dims(x, -1))
    
    lookup = IntegerLookup(output_mode="binary")
    lookup.adapt(feature)
    feature = lookup(input_name)
    return feature

def get_cat_feature(input_name, name, ds):
    feature = ds.map(lambda x, y: x[name])
    feature = feature.map(lambda x: tf.expand_dims(x, -1))
    
    lookup = StringLookup(output_mode="binary")
    lookup.adapt(feature)
    feature = lookup(input_name)
    return feature

In [16]:
train_df

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,CabinNum
0,0,0,BP,0,39.0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0,FS,0,24.0,0,109.0,9.0,25.0,549.0,44.0,0.0
2,0,0,AS,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0.0
3,0,0,AS,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0.0
4,1,0,FS,0,16.0,0,303.0,70.0,151.0,565.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0,0,AP,2,41.0,1,0.0,6819.0,0.0,1643.0,74.0,98.0
8689,1,1,GS,1,18.0,0,0.0,0.0,0.0,0.0,0.0,1499.0
8690,1,0,GS,0,26.0,0,0.0,0.0,1872.0,1.0,0.0,1500.0
8691,0,0,ES,2,32.0,0,0.0,1049.0,0.0,353.0,3235.0,608.0


In [17]:
home_p_train = get_num_cat_feature(home_p, 'HomePlanet', train_ds)
cryo_train = get_feature(cryo, 'CryoSleep', train_ds)
dest_train = get_num_cat_feature(dest, 'Destination', train_ds)
age_train = get_feature(age, 'Age', train_ds)
vip_train = get_feature(vip, 'VIP', train_ds)
r_service_train = get_feature(r_service, 'RoomService', train_ds)
f_court_train = get_feature(f_court, 'FoodCourt', train_ds)
s_mall_train = get_feature(s_mall, 'ShoppingMall', train_ds)
spa_train = get_feature(spa, 'Spa', train_ds)
v_deck_train = get_feature(v_deck, 'VRDeck', train_ds)
cabin_train = get_cat_feature(cabin, "Cabin", train_ds)
cabin_num_train = get_feature(cabin_num, 'CabinNum', train_ds)


2022-09-01 02:55:29.906398: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [18]:
features1 = keras.layers.concatenate([age_train, f_court_train, cabin_train, cabin_num_train, dest_train, r_service_train, v_deck_train, spa_train, home_p_train])

features2 = keras.layers.concatenate([cryo_train, s_mall_train, vip_train])

In [19]:
a1 = keras.layers.Dense(256, activation='selu')(features1)
a1 = keras.layers.Dense(4, activation='selu')(a1)

b1 = keras.layers.Dense(256, activation='selu')(features2)
b1 = keras.layers.Dense(4, activation='selu')(b1)

con = keras.layers.add([a1,b1])
con = keras.layers.Dense(4, activation='exponential')(con)

output = keras.layers.Dense(1, activation="sigmoid")(con)

model = keras.Model(all_inputs, output)
model.compile("adam", "binary_crossentropy", metrics=["binary_accuracy"])

model.fit(train_ds, epochs=128, validation_data=val_ds)
#60/65?

Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/128
Epoch 71/128
Epoch 72/128
Epoch 73/128
Epoch 74/128
Epoch 75/128
Epoch 76/128
Epoch 77/128
Epoch 78

<keras.callbacks.History at 0x7f3c7030a0d0>

In [20]:
#tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

In [21]:
results = model.predict(test_ds)
prediction  = [False if i < 0.5 else True for i in results]
prediction

[True,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 

In [22]:
submission = pd.DataFrame()
submission['PassengerId']  = submission_pid
submission['Transported'] = prediction
submission.to_csv('submission.csv', index=False)
submission

Unnamed: 0,PassengerId,Transported
0,0013_01,True
1,0018_01,False
2,0019_01,True
3,0021_01,True
4,0023_01,False
...,...,...
4272,9266_02,True
4273,9269_01,False
4274,9271_01,True
4275,9273_01,True
