#### Import Required Libraries

In [None]:
!pip install pdf2image

In [None]:
import os
import cv2
import numpy as np
import joblib
import matplotlib.pyplot as plt
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
import pydicom
from pdf2image import convert_from_path

#### Feature Extraction: HOG + Canny

In [None]:
def extract_hog_canny_features(img_path, visualize=True):
    """Extract and visualize combined HOG and Canny Edge features from an image."""
    # Load image in grayscale
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        # Check if image was loaded correctly
    if img is None:
        print(f"⚠️ Warning: Unable to read image {img_path}")
        return None  # Skip this image
        
    img = cv2.resize(img, (600, 600))  # Resize for consistency

    # 1️⃣ Extract HOG features
    # hog_features, hog_image = hog(img, orientations=9, pixels_per_cell=(8, 8),
    #                               cells_per_block=(2, 2), visualize=True)
    hog_features, hog_image = hog(img, orientations=12, pixels_per_cell=(4, 4),
                              cells_per_block=(2, 2), visualize=True)

    # 2️⃣ Extract Canny edges
    canny_edges = cv2.Canny(img, threshold1=30, threshold2=100)

    # 3️⃣ Combine HOG + Canny features
    canny_features = canny_edges.flatten()  # Flatten edge data
    combined_features = np.concatenate((hog_features, canny_features))  # Final feature vector

    # Visualize Results
    if visualize:
        fig, axes = plt.subplots(1, 3, figsize=(12, 4))
        axes[0].imshow(img, cmap='gray')
        axes[0].set_title("Original Image")
        
        axes[1].imshow(hog_image, cmap='gray')
        axes[1].set_title("HOG Features")

        axes[2].imshow(canny_edges, cmap='gray')
        axes[2].set_title("Canny Edges")

        plt.show()

    return combined_features

#### Test on a Sample Chest X-ray Image

In [None]:
sample_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\chest_xrays1000\00028175_000.png"  # Change to actual image path
extract_hog_canny_features(sample_image, visualize=True)

#### Test on Sample non - Chest X-ray Images

In [None]:
sample_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\non_xrays\Te-gl_0050.jpg"  # Change to actual image path
extract_hog_canny_features(sample_image, visualize=True)

In [None]:
sample_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\non_chestxrays1000\Image_5_15.jpg"  # Change to actual image path
extract_hog_canny_features(sample_image, visualize=True)

####  Extract Features from Chest X-rays & Non-X-rays

In [None]:
# Define dataset paths
chest_xray_dir = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\chest_xrays1000"
non_xray_dir = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\non_chestxrays1000"

chest_xray_features = []
non_xray_features = []

# Extract features from Chest X-rays
for img_name in os.listdir(chest_xray_dir):
    img_path = os.path.join(chest_xray_dir, img_name)
    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):  # Ensure valid image
        features = extract_hog_canny_features(img_path, visualize=False)
        if features is not None:  # Only add valid features
            chest_xray_features.append(features)

# Extract features from Non-X-ray images
for img_name in os.listdir(non_xray_dir):
    img_path = os.path.join(non_xray_dir, img_name)
    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):  # Ensure valid image
        features = extract_hog_canny_features(img_path, visualize=False)
        if features is not None:  # Only add valid features
            non_xray_features.append(features)

# Validate dataset size
print("Extracted", len(chest_xray_features), "chest X-ray features")
print("Extracted", len(non_xray_features), "non-X-ray features")

# Ensure data is not empty
if len(chest_xray_features) == 0 or len(non_xray_features) == 0:
    raise ValueError("Feature extraction failed. Check image paths and dataset structure.")


#### Train a Random Forest Model on HOG + Canny Features

In [None]:
# Convert lists to NumPy arrays
X = np.array(chest_xray_features + non_xray_features)
y = np.array([1] * len(chest_xray_features) + [0] * len(non_xray_features))  # 1 = Chest X-ray, 0 = Non-X-ray

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RF Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save the trained model
joblib.dump(rf_model, "chest_xray_rf_model_v2_hybrid.pkl")
print("Random Forest Model saved successfully!")

#### Model Evaluation

In [None]:
# Load trained model
rf_model = joblib.load("chest_xray_rf_model_v2_hybrid.pkl")

# Make Predictions
y_pred = rf_model.predict(X_test)

# Calculate Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1-score: {f1:.4f}")

# Classification Report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Non-X-ray", "Chest X-ray"], yticklabels=["Non-X-ray", "Chest X-ray"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


### Predict New Images Using the Hybrid Model

#### Convert DICOM images to PNG

In [None]:
def convert_dicom_to_image(dicom_path):
    """Convert DICOM (.dcm) file to a standard image format (JPG/PNG)."""
    try:
        dicom_data = pydicom.dcmread(dicom_path)  # Read DICOM file
        image_array = dicom_data.pixel_array  # Extract pixel data

        # Normalize pixel values (DICOMs often have high bit depths)
        image_array = (image_array - np.min(image_array)) / (np.max(image_array) - np.min(image_array))
        image_array = (image_array * 255).astype(np.uint8)  # Convert to 8-bit image

        # Save as PNG
        img_path = dicom_path.replace(".dcm", ".png")
        cv2.imwrite(img_path, image_array)
        print(f"✅ DICOM converted to image: {img_path}")

        return img_path
    except Exception as e:
        print(f"⚠️ Error converting DICOM to image: {str(e)}")
        return None


#### Predict New images

In [None]:
def classify_image(file_path):
    """Classify an image, or DICOM file using HOG + Canny with a trained model."""
    # Step 1: Determine file type
    file_extension = os.path.splitext(file_path)[-1].lower()

    if file_extension == ".dcm":
        # Convert DICOM to standard image
        img_path = convert_dicom_to_image(file_path)
    elif file_extension in [".png", ".jpg", ".jpeg"]:
        img_path = file_path  # It's already an image
    else:
        print("❌ Unsupported file format:", file_extension)
        return "Invalid file type"

    # If conversion failed, return error
    if img_path is None:
        return "Image extraction/conversion failed"

    # Step 2: Load the trained model
    rf_model = joblib.load("chest_xray_rf_model_hybrid.pkl")

    # Step 3: Extract HOG + Canny features from the uploaded image
    features = extract_hog_canny_features(img_path)
    if features is None:
        return "❌ Failed to extract features from image"

    features = np.array(features).reshape(1, -1)  # Reshape for prediction

    # Step 4: Get prediction
    prediction = rf_model.predict(features)
    confidence = rf_model.predict_proba(features)[0][1]  # Confidence score for "Chest X-ray"

    return "Chest X-ray" if prediction[0] == 1 else "Non-Chest X-ray", confidence


#### Test on Sample Images

#### png/jpg images of chest xrays

In [None]:
test_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\test_files\CHNCXR_0029_0.png"  # Change this to an image path
result, conf = classify_image(test_image)
print(f"Prediction: {result}, Confidence: {conf:.2f}")

In [None]:
test_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\test_files\CHNCXR_0030_0.png"  # Change this to an image path
result, conf = classify_image(test_image)
print(f"Prediction: {result}, Confidence: {conf:.2f}")

In [None]:
test_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\test_files\CHNCXR_0031_0.png"  # Change this to an image path
result, conf = classify_image(test_image)
print(f"Prediction: {result}, Confidence: {conf:.2f}")

#### png/jpg images of non-chest xrays

In [None]:
test_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\test_files\Te-gl_0181.jpg"  # Change this to an image path
result, conf = classify_image(test_image)
print(f"Prediction: {result}, Confidence: {conf:.2f}")

In [None]:
test_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\test_files\Image_87_16.jpg"  # Change this to an image path
result, conf = classify_image(test_image)
print(f"Prediction: {result}, Confidence: {conf:.2f}")

#### Test for Dicom images of chest xrays

In [None]:
test_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\test_files\00c0f4e4-ac8b-4dea-8231-802f6b9860dc.dcm"  # Change this to an image path
result, conf = classify_image(test_image)
print(f"Prediction: {result}, Confidence: {conf:.2f}")

In [None]:
test_image = r"C:\Users\abhin\OneDrive\MSDS_Northwestern_University\Courses\MSDSP_498_Capstone\Lung_diseases_detection\chest_xray_detector\test_files\00c67fbc-35f6-4058-88b4-4bfad19014ec.dcm"  # Change this to an image path
result, conf = classify_image(test_image)
print(f"Prediction: {result}, Confidence: {conf:.2f}")