<a href="https://colab.research.google.com/github/zainabkhalid90/Titanic-Classification/blob/main/TITANIC_CLASSIFIER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [13]:
# Load the dataset
file_path = '/content/Titanic-Dataset.csv'  # Adjust the path if necessary
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
df.head()


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [14]:
# Fill missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df['Fare'].fillna(df['Fare'].median(), inplace=True)

# Convert categorical columns to numerical
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['Embarked'] = label_encoder.fit_transform(df['Embarked'])

# Drop irrelevant columns
df.drop(['Name', 'Ticket', 'Cabin', 'PassengerId'], axis=1, inplace=True)

# Display the updated dataset
df.head()


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2


In [15]:
# Feature Selection
X = df.drop('Survived', axis=1)
y = df['Survived']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the datasets
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((712, 7), (179, 7), (712,), (179,))

In [16]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Display the first few rows of the scaled training data
X_train_scaled[:5]


array([[-1.61413602,  0.7243102 ,  1.25364106, -0.47072241, -0.47934164,
        -0.07868358,  0.5635246 ],
       [-0.40055118,  0.7243102 , -0.47728355, -0.47072241, -0.47934164,
        -0.37714494,  0.5635246 ],
       [ 0.81303367,  0.7243102 ,  0.21508629, -0.47072241, -0.47934164,
        -0.47486697,  0.5635246 ],
       [ 0.81303367,  0.7243102 , -0.24649361,  0.37992316, -0.47934164,
        -0.47623026,  0.5635246 ],
       [ 0.81303367, -1.38062393, -1.78509326,  2.93185988,  2.04874166,
        -0.02524937,  0.5635246 ]])

In [17]:
# Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Print the model to confirm training is complete
model


In [18]:
# Evaluate the model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 82.12%

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.88      0.85       105
           1       0.81      0.74      0.77        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179


Confusion Matrix:
 [[92 13]
 [19 55]]


In [19]:
# Example: Predicting the survival of a new passenger
# Format: [Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]
new_passenger = np.array([[3, 0, 22.0, 1, 0, 7.25, 2]])  # Sample data
new_passenger_scaled = scaler.transform(new_passenger)
prediction = model.predict(new_passenger_scaled)
print("Prediction (0 = Not Survived, 1 = Survived):", prediction[0])


Prediction (0 = Not Survived, 1 = Survived): 1


