# Logistic Regression from Scratch on Titanic Dataset

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

titan = pd.read_csv("titanic.csv")

titan = titan[["Survived", "Pclass", "Sex", "Age", "SibSp", "Parch", "Fare"]]
titan["Sex"] = titan["Sex"].map({"male": 0, "female": 1})
titan["Age"].fillna(titan["Age"].median(), inplace=True)
titan["Fare"].fillna(titan["Fare"].median(), inplace=True)

from sklearn.model_selection import train_test_split
X = titan.drop("Survived", axis=1).values
y = titan["Survived"].values.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

class LogisticRegressionScratch:
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros((n_features, 1))
        self.bias = 0

        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return np.where(y_predicted > 0.5, 1, 0)

model = LogisticRegressionScratch(lr=0.01, n_iters=2000)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, predictions)
plt.imshow(cm, cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.colorbar()
plt.show()

plt.hist(df["Age"], bins=20, color='skyblue', edgecolor='black')
plt.title("Age Distribution")
plt.xlabel("Age")
plt.ylabel("Count")
plt.show()

accuracy = np.mean(predictions == y_test) * 100
print("Accuracy:", round(accuracy, 2), "%")
