In [None]:
"""
Small-Scale Implementation & Validation for AI-Powered ECG Interpreter
=========================================================================

This script performs the following:
1. Loads a PDF file containing sample ECG graphs and displays the images.
2. Generates synthetic ECG data (simulated signals with arrhythmia labels) and performs EDA.
3. Implements a simple 1D CNN model to classify the synthetic ECG signals.

Required libraries:
  - pdf2image
  - matplotlib
  - numpy
  - pandas
  - scikit-learn
  - tensorflow (or keras)
  
To install dependencies, run:
  pip install pdf2image matplotlib numpy pandas scikit-learn tensorflow

Note: Ensure you have poppler installed on your system for pdf2image to work.
(For Windows, download poppler binaries; on Linux, install via package manager.)
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pdf2image import convert_from_path
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# -----------------------------
# PART 1: Basic Visualization of ECG PDF Graphs
# -----------------------------

# Path to the sample ECG PDF file (ensure you have a file named 'sample_ecg.pdf' in your working directory)
pdf_file = 'sample_ecg.pdf'

if os.path.exists(pdf_file):
    # Convert PDF pages to images (list of PIL images)
    pages = convert_from_path(pdf_file, dpi=150)
    print(f"Converted {len(pages)} pages from the PDF.")

    # Display the images
    plt.figure(figsize=(10, 8))
    for i, page in enumerate(pages):
        plt.subplot(len(pages), 1, i + 1)
        plt.imshow(page)
        plt.axis('off')
        plt.title(f"ECG Graph - Page {i+1}")
    plt.tight_layout()
    plt.show()
else:
    print(f"PDF file '{pdf_file}' not found. Please ensure it exists in the working directory.")

# -----------------------------
# PART 2: Simple EDA on Synthetic ECG Data
# -----------------------------
# For demonstration, we'll generate synthetic ECG-like signals.
# In practice, you would load real ECG data (e.g., from CSV, WFDB, etc.)

def generate_synthetic_ecg(n_samples=1000, timesteps=500, noise_level=0.1):
    """
    Generates synthetic ECG signals.
    - Normal ECG signals: simple sine waves with periodic peaks.
    - Arrhythmia signals: sine waves with irregular patterns.
    """
    X = []
    y = []
    for i in range(n_samples):
        # Base ECG signal: sine wave + baseline wander
        t = np.linspace(0, 1, timesteps)
        ecg = np.sin(2 * np.pi * 5 * t)  # 5 Hz sine wave simulating heart beats
        
        # Add random noise
        ecg += noise_level * np.random.randn(timesteps)
        
        # Randomly decide if this sample has arrhythmia
        if np.random.rand() < 0.3:
            # Introduce irregularity: multiply a segment by a factor or add extra peaks
            idx = np.random.randint(100, timesteps-100)
            ecg[idx:idx+20] *= np.random.uniform(1.5, 2.0)
            label = 1  # Arrhythmia
        else:
            label = 0  # Normal
        
        X.append(ecg)
        y.append(label)
        
    return np.array(X), np.array(y)

# Generate synthetic dataset
X, y = generate_synthetic_ecg(n_samples=1000, timesteps=500, noise_level=0.1)
print("Synthetic ECG dataset shape:", X.shape)

# Plot a few sample ECG signals
plt.figure(figsize=(12, 6))
for i in range(4):
    plt.subplot(2, 2, i+1)
    plt.plot(X[i], color='darkblue')
    plt.title(f"Sample ECG Signal - Label: {'Arrhythmia' if y[i]==1 else 'Normal'}")
    plt.xlabel("Time")
    plt.ylabel("Amplitude")
plt.tight_layout()
plt.show()

# Perform a simple EDA: calculate and plot distribution of signal mean values
df = pd.DataFrame(X)
df['mean_signal'] = df.mean(axis=1)
plt.figure(figsize=(8, 4))
plt.hist(df['mean_signal'], bins=30, color='skyblue', edgecolor='black')
plt.title("Distribution of Mean Signal Values")
plt.xlabel("Mean Signal Amplitude")
plt.ylabel("Frequency")
plt.show()

# -----------------------------
# PART 3: ML Model - 1D CNN for ECG Classification
# -----------------------------
# Reshape data for CNN input: (samples, timesteps, channels)
X = X[..., np.newaxis]  # add channel dimension

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print("Training samples:", X_train.shape[0], "Testing samples:", X_test.shape[0])

# Define a simple 1D CNN model
model = Sequential([
    Conv1D(filters=16, kernel_size=5, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(filters=32, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.5),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification: Normal vs Arrhythmia
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model with early stopping
es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2, callbacks=[es])

# Plot training history
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss', marker='o')
plt.plot(history.history['val_loss'], label='Val Loss', marker='o')
plt.title("Model Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history.history['val_accuracy'], label='Val Accuracy', marker='o')
plt.title("Model Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.tight_layout()
plt.show()

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Predict and display classification report
y_pred = (model.predict(X_test) > 0.5).astype(int)
print(classification_report(y_test, y_pred))
