In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

In [2]:
train=pd.read_csv('./train.csv')
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
train.drop(["Cabin","PassengerId","Ticket","Name","Embarked"],inplace=True,axis=1)

In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   Sex       891 non-null    object 
 3   Age       714 non-null    float64
 4   SibSp     891 non-null    int64  
 5   Parch     891 non-null    int64  
 6   Fare      891 non-null    float64
dtypes: float64(2), int64(4), object(1)
memory usage: 48.9+ KB


In [5]:
avg_Age = train['Age'].mean()
train["Age"]=train["Age"].fillna(avg_Age)

In [6]:
train.isna().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
dtype: int64

In [7]:
train_x=train.drop(['Survived'],axis=1)
train_y=train['Survived'].to_numpy()

In [8]:
ct = ColumnTransformer([("ohe", OneHotEncoder(drop='first',handle_unknown='ignore'),
                         ['Sex']),
                         ("scaler", StandardScaler(),['Age','Fare'])],
                       remainder = 'passthrough')
train_x=ct.fit_transform(train_x)
train_x

array([[ 1.        , -0.5924806 , -0.50244517,  3.        ,  1.        ,
         0.        ],
       [ 0.        ,  0.63878901,  0.78684529,  1.        ,  1.        ,
         0.        ],
       [ 0.        , -0.2846632 , -0.48885426,  3.        ,  0.        ,
         0.        ],
       ...,
       [ 0.        ,  0.        , -0.17626324,  3.        ,  1.        ,
         2.        ],
       [ 1.        , -0.2846632 , -0.04438104,  1.        ,  0.        ,
         0.        ],
       [ 1.        ,  0.17706291, -0.49237783,  3.        ,  0.        ,
         0.        ]])

In [9]:
train_x=train_x.astype('float32')
train_y=train_y.astype('float32')
print(train_x.shape)
print(train_x.ndim)
print(train_y.shape)
print(train_y.ndim)

(891, 6)
2
(891,)
1


In [10]:
train_y = train_y.reshape(-1, 1)
print(train_y.shape)
print(train_y.ndim)

(891, 1)
2


In [11]:
class Dataset:
    def __init__(self,x,y):
        self.X = torch.from_numpy(x)
        self.Y = torch.from_numpy((y))
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self, item):
        return self.X[item], self.Y[item]

In [12]:
train_dataset = Dataset(train_x,train_y)
train_dataset = DataLoader(train_dataset, batch_size=16, shuffle=True)

In [13]:
class Pikachu(torch.nn.Module):
    def __init__(self, n_inputs):
        super(Pikachu, self).__init__()
        self.block = nn.Sequential( 
          nn.Linear(n_inputs, 200),
          nn.ReLU(),
          nn.Linear(200, 300),
          nn.ReLU(),
          nn.Linear(300, 1),
          nn.Sigmoid(),
        )

    def forward(self, X):
        return self.block(X)

In [14]:
model = Pikachu(train_x.shape[1])


In [15]:
loss_fn = nn.modules.loss.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
for epoch in range(1000):
    for X, Y in train_dataset:
        optimizer.zero_grad()
        y_pred = model(X)
        loss = loss_fn(y_pred, Y)
        loss.backward()
        optimizer.step()

In [16]:
test=pd.read_csv('./test.csv')
test.drop(["Cabin","PassengerId","Ticket","Name","Embarked"],inplace=True,axis=1)
avg_Age = test['Age'].mean()
test['Age']=test['Age'].fillna(avg_Age)
test=ct.transform(test)
test=test.astype('float32')
test=torch.from_numpy(test)

In [17]:
with torch.no_grad():
    Y_predict = model(test)

In [18]:
Y_predict = Y_predict.numpy()
Y_predict = Y_predict[:,0]
Y_predict = np.where(Y_predict > 0.5, 1, 0)

In [19]:
submission = pd.read_csv('./gender_submission.csv')
submission['Survived'] = Y_predict
submission.to_csv('submission.csv',index=False)