<a href="https://colab.research.google.com/github/yashkapur0403/Neural-Networks-Practise/blob/main/XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

from google.colab import drive
import zipfile, os, cv2
import numpy as np

drive.mount('/content/drive')  # allow permission
zip_path = "/content/drive/My Drive/Colab Notebooks/chest_xray.zip"
extract_path = "/content/chest_xray"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# STEP 2: Load images and prepare data
def load_images_from_folder(folder_path, label):
    data = []
    for filename in os.listdir(folder_path):
        path = os.path.join(folder_path, filename)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            img = cv2.resize(img, (64, 64))
            img_flat = img.flatten() / 255.0
            data.append((img_flat, label))
    return data

normal_data = load_images_from_folder("/content/chest_xray/chest_xray/train/NORMAL", 0)
pneumonia_data = load_images_from_folder("/content/chest_xray/chest_xray/train/PNEUMONIA", 1)
all_data = normal_data + pneumonia_data

X_train = np.array([x for x, _ in all_data])
y_train = np.array([y for _, y in all_data])

normal_test = load_images_from_folder("/content/chest_xray/chest_xray/test/NORMAL", 0)
pneumonia_test = load_images_from_folder("/content/chest_xray/chest_xray/test/PNEUMONIA", 1)
all_test = normal_test + pneumonia_test

X_test = np.array([x for x, _ in all_test])
y_test = np.array([y for _, y in all_test])

Mounted at /content/drive


In [None]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class XGBoostSimple:
    def __init__(self, n_estimators=50, learning_rate=0.1, reg_lambda=0.1):
        self.n_estimators = n_estimators
        self.lr = learning_rate
        self.reg_lambda = reg_lambda
        self.models = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        pos_ratio = np.clip(np.mean(y), 1e-6, 1 - 1e-6)
        F = np.full(n_samples, np.log(pos_ratio / (1 - pos_ratio)))

        for _ in range(self.n_estimators):
            prob = sigmoid(F)
            g = prob - y
            h = prob * (1 - prob)

            best_feat, best_thresh, best_gain = None, None, -np.inf

            for feat in range(n_features):
                thresholds = np.linspace(X[:, feat].min(), X[:, feat].max(), 10)[1:-1]
                for t in thresholds:
                    left = X[:, feat] <= t
                    right = ~left

                    if left.sum() == 0 or right.sum() == 0:
                        continue

                    G_L = np.sum(g[left])
                    H_L = np.sum(h[left])
                    G_R = np.sum(g[right])
                    H_R = np.sum(h[right])
                    G = np.sum(g)
                    H = np.sum(h)

                    gain = 0.5 * (
                        (G_L ** 2) / (H_L + self.reg_lambda) +
                        (G_R ** 2) / (H_R + self.reg_lambda) -
                        (G ** 2) / (H + self.reg_lambda)
                    )

                    if gain > best_gain:
                        best_feat = feat
                        best_thresh = t
                        best_gain = gain

            left_idx = X[:, best_feat] <= best_thresh
            right_idx = ~left_idx

            G_L = np.sum(g[left_idx])
            H_L = np.sum(h[left_idx])
            G_R = np.sum(g[right_idx])
            H_R = np.sum(h[right_idx])

            gamma_L = -G_L / (H_L + self.reg_lambda)
            gamma_R = -G_R / (H_R + self.reg_lambda)

            F[left_idx] += self.lr * gamma_L
            F[right_idx] += self.lr * gamma_R

            self.models.append((best_feat, best_thresh, gamma_L, gamma_R))

    def predict_proba(self, X):
        F = np.zeros(X.shape[0])
        for feat, thresh, gamma_L, gamma_R in self.models:
            left = X[:, feat] <= thresh
            right = ~left
            F[left] += self.lr * gamma_L
            F[right] += self.lr * gamma_R
        return sigmoid(F)

    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)


In [None]:
from sklearn.metrics import accuracy_score

model = XGBoostSimple(n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

print("Train Accuracy:", accuracy_score(y_train, model.predict(X_train)))

Test Accuracy: 0.7852564102564102
Train Accuracy: 0.9173062164236377


In [4]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

model = XGBClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Sklearn XGBoost Test Accuracy:", accuracy_score(y_test, y_pred))


Sklearn XGBoost Test Accuracy: 0.7275641025641025
