In [24]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt 
import tensorflow as tf 
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer

In [25]:
df = pd.read_csv("dataset/train.csv")
df.sample(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
316,317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24.0,1,0,244367,26.0,,S
24,25,0,3,"Palsson, Miss. Torborg Danira",female,8.0,3,1,349909,21.075,,S
250,251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S
687,688,0,3,"Dakic, Mr. Branko",male,19.0,0,0,349228,10.1708,,S
729,730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25.0,1,0,STON/O2. 3101271,7.925,,S


In [26]:
df.duplicated().sum()

0

In [27]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [28]:
df.isnull().sum() / df.shape[0] * 100

PassengerId     0.000000
Survived        0.000000
Pclass          0.000000
Name            0.000000
Sex             0.000000
Age            19.865320
SibSp           0.000000
Parch           0.000000
Ticket          0.000000
Fare            0.000000
Cabin          77.104377
Embarked        0.224467
dtype: float64

In [29]:
df.drop(columns=["Cabin"],inplace=True)

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(4)
memory usage: 76.7+ KB


In [31]:
numerical = df.select_dtypes(include=["int64","float64"])
categorical = df.select_dtypes(include=["object"])

In [32]:
numerical.corr()["Survived"]

PassengerId   -0.005007
Survived       1.000000
Pclass        -0.338481
Age           -0.077221
SibSp         -0.035322
Parch          0.081629
Fare           0.257307
Name: Survived, dtype: float64

In [33]:
delete_col = numerical.columns[numerical.corr()["Survived"]<0.1]
delete_col

Index(['PassengerId', 'Pclass', 'Age', 'SibSp', 'Parch'], dtype='object')

In [34]:
df.drop(columns=delete_col,inplace=True)

In [35]:
df.sample(5)

Unnamed: 0,Survived,Name,Sex,Ticket,Fare,Embarked
534,0,"Cacic, Miss. Marija",female,315084,8.6625,S
718,0,"McEvoy, Mr. Michael",male,36568,15.5,Q
652,0,"Kalvik, Mr. Johannes Halvorsen",male,8475,8.4333,S
462,0,"Gee, Mr. Arthur H",male,111320,38.5,S
271,1,"Tornquist, Mr. William Henry",male,LINE,0.0,S


In [36]:
df.drop(columns=["Name","Ticket"],inplace=True)

In [37]:
df.sample(5)

Unnamed: 0,Survived,Sex,Fare,Embarked
802,1,male,120.0,S
717,1,female,10.5,S
464,0,male,8.05,S
158,0,male,8.6625,S
182,0,male,31.3875,S


In [38]:
df.isnull().sum()

Survived    0
Sex         0
Fare        0
Embarked    2
dtype: int64

In [39]:
X_train = df.iloc[:,1:]
y_train = df["Survived"]
X_train

Unnamed: 0,Sex,Fare,Embarked
0,male,7.2500,S
1,female,71.2833,C
2,female,7.9250,S
3,female,53.1000,S
4,male,8.0500,S
...,...,...,...
886,male,13.0000,S
887,female,30.0000,S
888,female,23.4500,S
889,male,30.0000,C


In [40]:
trans1 = ColumnTransformer(
    transformers=[
        ("impute_cat",SimpleImputer(strategy="most_frequent",add_indicator=True),[2])
    ],remainder="passthrough"
)

trans2 = ColumnTransformer(
    transformers=[
        ("ohe",OneHotEncoder(handle_unknown="ignore",sparse_output=False,drop="first"),slice(0,3))
    ],remainder="passthrough"
)

trans3 = ColumnTransformer(
    transformers=[
        ("scaling",StandardScaler(),slice(0,5))
    ],remainder="passthrough"
)


In [41]:
pipe = Pipeline(
    [
        ("trans1",trans1),
        ("trans2",trans2),
        ("trans3",trans3)
    ]
)
pipe.fit(X_train,y_train)

In [42]:
X_train = pipe.fit_transform(X_train)

In [43]:
X_train.shape

(891, 5)

<br>
<br>

# now make neural network layer

<br>
<br>

In [44]:

relu = tf.keras.activations.relu
sigmoid = tf.keras.activations.sigmoid

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(5,activation=relu,input_dim=(5)))
model.add(tf.keras.layers.Dense(2,activation=relu))
model.add(tf.keras.layers.Dense(2,activation=relu))
model.add(tf.keras.layers.Dense(1,activation=sigmoid))

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [45]:
loss = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss=loss,optimizer=optimizer,metrics=["accuracy"])

In [None]:
tf.keras.

In [46]:
model.fit(X_train,y_train,epochs=40,validation_split=0.1)

Epoch 1/40


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.6194 - loss: 0.6756 - val_accuracy: 0.6222 - val_loss: 0.6039
Epoch 2/40
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6013 - loss: 0.5993 - val_accuracy: 0.6222 - val_loss: 0.5573
Epoch 3/40
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6058 - loss: 0.5692 - val_accuracy: 0.7667 - val_loss: 0.5434
Epoch 4/40
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7754 - loss: 0.5484 - val_accuracy: 0.7778 - val_loss: 0.5355
Epoch 5/40
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7923 - loss: 0.5214 - val_accuracy: 0.7778 - val_loss: 0.5298
Epoch 6/40
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7664 - loss: 0.5474 - val_accuracy: 0.7778 - val_loss: 0.5335
Epoch 7/40
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7939f824a480>