In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
 
import pandas as pd
import numpy as np
import copy

In [2]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)

In [3]:
df_train = pd.read_csv('data/train.csv')
df_test = pd.read_csv('data/test.csv')
df_sample_submission = pd.read_csv('data/sample_submission.csv')

In [4]:
def preprocess(df): 
    dff = pd.DataFrame()
    
    # min max normalize column 
    normalize = lambda s : (s - s.min()) / (s.max() - s.min())

    # split id into group and nr and normalize
    dff['Group'] = pd.to_numeric(df['PassengerId'].str[:4]) 
    dff['Nr'] = pd.to_numeric(df['PassengerId'].str[5:])
    dff['Group'] = normalize(dff['Group'])
    dff['Nr'] = normalize(dff['Nr'])
    
    # one hot encode planet
    dff = pd.concat([dff, pd.get_dummies(df['HomePlanet'])], axis=1)
    
    # encode CrypSleep bool as 0/1
    dff['CryoSleep'] = df['CryoSleep'].map({False:0, True:1}).fillna(False).astype(np.uint8)

    # Split cabin into Deck, Cabin, Side
    dff = pd.concat([dff, df['Cabin'].str.split('/', expand=True)], axis=1)
    dff.rename({0:'Deck', 1:'Cabin', 2:'Side'}, axis=1, inplace=True)
    
    # normalize cabin number
    dff['Cabin'] = normalize(dff['Cabin'].astype(float).fillna(0))
    
    # one hot encode deck
    dff = pd.concat([dff, pd.get_dummies(dff['Deck'], prefix='Deck')], axis=1)
    dff.drop('Deck', axis=1, inplace=True)

    # encode Side as P=0 S=1
    dff['Side'] = dff['Side'].map({'P':0, 'S':1}).fillna(0.5).astype(np.float64)
    
    # one hot encode Destination 
    dff = pd.concat([dff, pd.get_dummies(df['Destination'], prefix='Destination')], axis=1)
    
    # normalize age 
    dff['Age'] = normalize(df['Age'].fillna('0').astype(int))
    
    # encode VIP bool as 0/1
    dff['VIP'] = df['VIP'].map({False:0, True:1}).fillna(False).astype(np.uint8)

    # nomalize
    dff['RoomService'] = normalize(df['RoomService'].fillna('0').astype(float))
    dff['FoodCourt'] = normalize(df['FoodCourt'].fillna('0').astype(float))
    dff['Spa'] = normalize(df['Spa'].fillna('0').astype(float))
    dff['VRDeck'] = normalize(df['VRDeck'].fillna('0').astype(float))
     
    return dff



In [5]:
X = preprocess(df_train)
y = df_train['Transported'].map({False:0, True:1}).fillna(False).astype(np.uint8)
y = pd.DataFrame(y)

# splitting into train and test data
test_size = 0.1
split_at = int(X.shape[0] * (1 - test_size))

X_train = X.iloc[:split_at,:]
X_val = X.iloc[split_at:,:]

y_train = y.iloc[:split_at,:]
y_val = y.iloc[split_at:,:]

In [6]:
class Deep(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(25, 50)
        self.act1 = nn.ReLU()
        self.layer2 = nn.Linear(50, 50)
        self.act2 = nn.ReLU()
        self.layer3 = nn.Linear(50, 50)
        self.act3 = nn.ReLU()
        self.layer4 = nn.Linear(50, 50)
        self.act4 = nn.ReLU()
        self.layer5 = nn.Linear(50, 50)
        self.act5 = nn.ReLU()
        self.layer6 = nn.Linear(50, 50)
        self.act6 = nn.ReLU()
        self.layer7 = nn.Linear(50, 50)
        self.act7 = nn.ReLU()
        self.layer8 = nn.Linear(50, 50)
        self.act8 = nn.ReLU()
        self.layer9 = nn.Linear(50, 50)
        self.act9 = nn.ReLU()


        self.output = nn.Linear(50, 1)
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act2(self.layer2(x))
        x = self.act3(self.layer3(x))
        x = self.act4(self.layer4(x))
        x = self.act5(self.layer5(x))
        x = self.act6(self.layer6(x))
        x = self.act7(self.layer7(x))
        x = self.act8(self.layer8(x))
        x = self.act9(self.layer9(x))
        x = self.sigmoid(self.output(x))
        return x

model = Deep()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = 'cpu'
model.to(device)

Deep(
  (layer1): Linear(in_features=25, out_features=50, bias=True)
  (act1): ReLU()
  (layer2): Linear(in_features=50, out_features=50, bias=True)
  (act2): ReLU()
  (layer3): Linear(in_features=50, out_features=50, bias=True)
  (act3): ReLU()
  (layer4): Linear(in_features=50, out_features=50, bias=True)
  (act4): ReLU()
  (layer5): Linear(in_features=50, out_features=50, bias=True)
  (act5): ReLU()
  (layer6): Linear(in_features=50, out_features=50, bias=True)
  (act6): ReLU()
  (layer7): Linear(in_features=50, out_features=50, bias=True)
  (act7): ReLU()
  (layer8): Linear(in_features=50, out_features=50, bias=True)
  (act8): ReLU()
  (layer9): Linear(in_features=50, out_features=50, bias=True)
  (act9): ReLU()
  (output): Linear(in_features=50, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [7]:
def model_train(model, X_train, y_train, X_val, y_val):
    # loss function and optimizer
    loss_fn = nn.BCELoss()  # binary cross entropy
    optimizer = optim.Adam(model.parameters(), lr=0.001)
 
    n_epochs = 1000   # number of epochs to run
    batch_size = 100  # size of each batch
    batch_start = torch.arange(0, len(X_train), batch_size)
 
    # Hold the best model
    best_acc = - np.inf   # init to negative infinity
    best_weights = None
 
    for epoch in range(n_epochs):
        model.train()
        
        with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
            bar.set_description(f"Epoch {epoch}")
            
            for start in bar:
                # take a batch
                X_batch = X_train[start:start+batch_size]
                y_batch = y_train[start:start+batch_size]
                # forward pass
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
                # print progress
                acc = (y_pred.round() == y_batch).float().mean()
                bar.set_postfix(
                    loss=float(loss),
                    acc=float(acc)
                )
                
        # evaluate accuracy at end of each epoch
        model.eval()
        y_pred = model(X_val)
        acc = (y_pred.round() == y_val).float().mean()
        acc = float(acc)
        if acc > best_acc:
            best_acc = acc
            best_weights = copy.deepcopy(model.state_dict())

        # print(f'Epoch {epoch}  loss {loss}   acc {acc}')

    # restore model and return best accuracy
    model.load_state_dict(best_weights)

In [8]:
y_train_t = torch.tensor(y_train.values.astype(np.float32)).to(device)
X_train_t = torch.tensor(X_train.values.astype(np.float32)).to(device)

y_val_t = torch.tensor(y_val.values.astype(np.float32)).to(device)
X_val_t = torch.tensor(X_val.values.astype(np.float32)).to(device)

In [9]:
model_train(model, X_train_t, y_train_t, X_val_t, y_val_t)

Epoch 0: 100%|██████████| 79/79 [00:00<00:00, 173.62batch/s, acc=0.739, loss=0.569]
Epoch 1: 100%|██████████| 79/79 [00:00<00:00, 185.89batch/s, acc=0.783, loss=0.467]
Epoch 2: 100%|██████████| 79/79 [00:00<00:00, 191.67batch/s, acc=0.783, loss=0.445]
Epoch 3: 100%|██████████| 79/79 [00:00<00:00, 175.65batch/s, acc=0.783, loss=0.418]
Epoch 4: 100%|██████████| 79/79 [00:00<00:00, 182.79batch/s, acc=0.826, loss=0.393]
Epoch 5: 100%|██████████| 79/79 [00:00<00:00, 188.26batch/s, acc=0.826, loss=0.366]
Epoch 6: 100%|██████████| 79/79 [00:00<00:00, 175.47batch/s, acc=0.826, loss=0.349]
Epoch 7: 100%|██████████| 79/79 [00:00<00:00, 186.35batch/s, acc=0.826, loss=0.338]
Epoch 8: 100%|██████████| 79/79 [00:00<00:00, 187.42batch/s, acc=0.826, loss=0.333]
Epoch 9: 100%|██████████| 79/79 [00:00<00:00, 184.95batch/s, acc=0.826, loss=0.33]
Epoch 10: 100%|██████████| 79/79 [00:00<00:00, 180.02batch/s, acc=0.783, loss=0.334]
Epoch 11: 100%|██████████| 79/79 [00:00<00:00, 180.67batch/s, acc=0.826, los

KeyboardInterrupt: 

In [10]:
device

'cpu'

In [38]:
X_train_t = torch.tensor(preprocess(df_test).values.astype(np.float32)).to(device)
model.eval()
pred_t = model(X_train_t)
pred = pd.DataFrame(pred_t.detach().numpy(), columns=['Transported'])
pred['Transported'] = pred['Transported'].apply(np.round).astype(bool)
result = pd.concat([df_test['PassengerId'], pred], axis=1)
result.to_csv('prediction.csv', index=False)
result

Unnamed: 0,PassengerId,Transported
0,0013_01,True
1,0018_01,False
2,0019_01,True
3,0021_01,True
4,0023_01,True
...,...,...
4272,9266_02,True
4273,9269_01,True
4274,9271_01,True
4275,9273_01,True
