## ***Import Libraries***

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.linear_model import LogisticRegression 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV , train_test_split
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix
from sklearn.preprocessing import LabelEncoder , MinMaxScaler
import mplcyberpunk
plt.style.use('cyberpunk')


## ***Exploring Dataset***

In [None]:
data = pd.read_csv('Titanic-Dataset.csv')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe().T

## ***Check Missing Values***

In [None]:
sns.heatmap(data.isna(),yticklabels=False ,cbar=False)

In [None]:
data.isna().sum().sort_values(ascending=False)

In [None]:
data['Age'].fillna(data['Age'].mean(),inplace=True)

In [None]:
data.drop('Cabin' , inplace = True ,axis=1)

In [None]:
data['Embarked'] = data['Embarked'].fillna('S')

In [None]:
data.isna().sum().sort_values(ascending=False)

In [None]:
data.dropna(inplace=True)

In [None]:
data.shape

In [None]:
data.head()

## ***Handling Categorical Features***

In [None]:
Sex = pd.get_dummies(data['Sex'],drop_first=True)

In [None]:
Embarked = pd.get_dummies(data['Embarked'],drop_first=True)

In [None]:
data.drop(['Sex' , 'Embarked','Name','Ticket'],axis=1,inplace=True)

In [None]:
data = pd.concat([data,Sex,Embarked],axis=1)

In [None]:
data.head()

## ***Visualizing Dataset After Cleaning***

In [None]:
sns.countplot(x = 'Survived' , data=data)

In [None]:
sns.countplot(x = 'Survived' , data=data ,hue='male')

In [None]:
sns.countplot(x = 'Survived' , data=data ,hue='Pclass')

In [None]:
data['Age'].hist(bins=20)

In [None]:
data['Fare'].hist(bins=40)

In [None]:
sns.boxplot(y='Fare',data=data)

## ***PreProcessing***

In [None]:
le = LabelEncoder()
data['male'] = le.fit_transform(data['male'])
data['Q'] = le.fit_transform(data['Q'])
data['S'] = le.fit_transform(data['S'])


In [None]:
sc = MinMaxScaler()
data['Fare'] = sc.fit_transform(data['Fare'].values.reshape(-1, 1))

In [None]:
plt.figure(figsize=(8,6))
plt.hist(data['Fare'], bins = 50)
plt.title("Fare Distribution")
plt.xlabel('Fare')
plt.ylabel('Frequency')

In [None]:
data.head()

## ***Build Models***

In [None]:
x = data.drop((['Survived' , 'PassengerId']) , axis = 1)
y = data['Survived']

In [None]:
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size=0.2 ,random_state=42)

In [None]:
x_train.shape , x_test.shape , y_train.shape , y_test.shape

## ***Logistic Regression With GridSearchCV***

In [None]:
lr = LogisticRegression(random_state=42)
param_grid_lr = {
    'C' : [100,10,1.0,0.1,0.01],
    'penalty':['l1','l2']
}

lr_grid_model = GridSearchCV(lr,param_grid=param_grid_lr , scoring='accuracy' , cv= 5)
lr_grid_model.fit(x_train,y_train)
print("Best Parameters: ", lr_grid_model.best_params_)
Best_Logistic_Regression_Model = lr_grid_model.best_estimator_



In [None]:
print("Accuracy on Training Set: {:.2f}".format(lr_grid_model.best_score_ * 100))

In [None]:
LR_Grid_Score = Best_Logistic_Regression_Model.score(x_test, y_test)
print("Accuracy By Best Logistic Regression Model:",LR_Grid_Score*100)

In [None]:
lr_grid_y_pred = Best_Logistic_Regression_Model.predict(x_test)
conf_matrix = confusion_matrix(y_test, lr_grid_y_pred)
sns.heatmap(conf_matrix, annot=True)
plt.title("Confusion Matrix For Best Logistic Regression", fontsize=14)

## ***RandomForest With GridSearchCV***

In [None]:
rf = RandomForestClassifier(random_state=42)
param_grid_rf = {
    'n_estimators': [10, 100,1000],
    'max_depth': [15,20,40,80,90],
    'max_features' :[0.5,1,2,3],
    'min_samples_split': [8, 10,12],
    'min_samples_leaf': [ 5,10,15],
    'criterion' : ['gini', 'entropy'],
    'bootstrap': [True]
   
}

rf_grid_model = GridSearchCV(rf,param_grid=param_grid_rf , cv= 3,n_jobs=-1,verbose=1)
rf_grid_model.fit(x_train,y_train)
print("Best Parameters: ", rf_grid_model.best_params_)
Best_RandomForest_Model = rf_grid_model.best_estimator_



In [None]:
print("Accuracy on Training Set: {:.2f}".format(rf_grid_model.best_score_ * 100))

In [None]:
RF_Grid_Score = Best_RandomForest_Model.score(x_test, y_test)
print("Accuracy By Best RandomForest Model:",RF_Grid_Score*100)

In [None]:
rf_grid_y_pred = Best_RandomForest_Model.predict(x_test)
conf_matrix = confusion_matrix(y_test, rf_grid_y_pred)
sns.heatmap(conf_matrix, annot=True)
plt.title("Confusion Matrix For Best RandomForest", fontsize=14)

In [None]:
decision_tree_model = DecisionTreeClassifier(max_depth=10 ,criterion='gini',min_samples_split=4,min_samples_leaf=4,max_features=None,random_state=42)
decision_tree_model.fit(x_train, y_train)

In [None]:
y_train_pred_dt = decision_tree_model.predict(x_train)
accuracy_train_dt = accuracy_score(y_train, y_train_pred_dt)
print("Accuracy on Training Set: {:.2f}".format(accuracy_train_dt* 100))

In [None]:
DT_Score = decision_tree_model.score(x_test, y_test)
print("Accuracy By Decision Tree Model:",DT_Score*100)

In [None]:
model_names = ['Logistic Regression', 'RandomForest','Decision Tree']
train_accuracies = [lr_grid_model.best_score_,rf_grid_model.best_score_,accuracy_train_dt]     
plt.figure(figsize=(10, 6))
plt.bar(model_names, train_accuracies, label='Training Accuracy')
plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.title('Training Accuracy of Models')
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
test_accuracies = [LR_Grid_Score, RF_Grid_Score,DT_Score]  
plt.bar(model_names, test_accuracies, label='Testing Accuracy')

plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.title('Testing Accuracy of Models')
plt.legend()

plt.show()