In [None]:
# ------------------------------------------------------------------------------
# Quantum Support Vector Machine (QSVM) for Quantum Circuit Trojan Detection
# ------------------------------------------------------------------------------

# This notebook trains Quantum Support Vector Machines (QSVMs) using fidelity-based
# quantum kernels to classify quantum circuits as either clean or malicious.

# Dataset:
# - 7 Quantum Algorithms: Deutsch-Jozsa, QAOA, QFT, Shor, Grover, BV, and Universal
# - Each dataset contains 10 clean and 10 malicious variants (balanced class labels)
# - Features include: gate depth, gate counts, entropy, success rate, and circuit output states

# Objective:
# - Evaluate performance of QSVMs using fidelity quantum kernels
# - Benchmark classification accuracy, ROC AUC, and confusion matrices
# - Compare per-algorithm and universal performance

# Quantum Kernel:
# - FidelityQuantumKernel from Qiskit Machine Learning
# - Feature Map: ZZFeatureMap with 2 reps and linear entanglement
# - Uses inner product between circuit quantum states

# Outputs:
# - Confusion matrix plots
# - ROC curve plots
# - Classification reports with accuracy and AUC metrics
# - Saved per-algorithm in dedicated `qsvm_outputs/` subfolders

# Notes:
# - Uses sklearn's `SVC` with precomputed quantum kernels
# - Warnings for undefined precision are suppressed using `zero_division=0`
# - Universal dataset combines all six algorithm datasets

# Author: Zeeshan Ajmal



In [20]:
# --------------------------------------------------
# Import Libraries (Qiskit + ML + Utils)
# --------------------------------------------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ML Core
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler

# Qiskit + Quantum ML
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel

# Plot style
sns.set(style='whitegrid')


In [39]:
# ------------------------------------------------------------
# Merge All Algorithm Datasets into Universal Dataset (CSV)
# ------------------------------------------------------------

import pandas as pd
import os

# List of input files
csv_files = [
    "dj_full_dataset.csv",
    "qaoa_full_dataset.csv",
    "qft_full_dataset.csv",
    "shor_full_dataset.csv",
    "grover_full_dataset.csv",
    "bv_full_dataset.csv"
]

# Load all datasets and record full set of unique columns
all_dfs = [pd.read_csv(file) for file in csv_files]
all_columns = set()
for df in all_dfs:
    all_columns.update(df.columns)

# Convert to sorted list to ensure consistent column order
all_columns = sorted(all_columns)

# Normalize all datasets to full set of columns (fill missing with 0)
normalized_dfs = []
for i, df in enumerate(all_dfs):
    df_normalized = df.reindex(columns=all_columns, fill_value=0)
    normalized_dfs.append(df_normalized)

# Combine all datasets into one
df_universal = pd.concat(normalized_dfs, ignore_index=True)
df_universal.sort_values(by="name", inplace=True)

# Save to CSV
df_universal.to_csv("universal_dataset.csv", index=False)

# Preview
print("✅ Universal dataset created successfully!")
print("🔢 Shape:", df_universal.shape)
print("🧬 Columns:", list(df_universal.columns))


✅ Universal dataset created successfully!
🔢 Shape: (120, 15)
🧬 Columns: ['ccx', 'cp', 'cx', 'depth', 'entropy', 'h', 'label', 'name', 'rx', 'rz', 'success_rate', 'swap', 'total_gates', 'unique_states', 'x']


In [42]:
def train_qsvm_on_dataset(csv_file, algo_name, save_folder):

    os.makedirs(save_folder, exist_ok=True)

    # Load and clean dataset
    df = pd.read_csv(csv_file)

    # Convert all columns to numeric where applicable
    for col in df.columns:
        if col not in ['name', 'label']:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # Drop any rows with NaN values after conversion
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Extract features and labels
    X = df.drop(columns=["name", "label"])
    y = df["label"]

    # Check class balance
    print(f"📂 Dataset: {csv_file}")
    print("Class Distribution:\n", y.value_counts())

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42
    )

    # Convert to lists
    X_train_list = X_train.values.tolist()
    X_test_list = X_test.values.tolist()

    # Quantum kernel setup
    feature_map = ZZFeatureMap(feature_dimension=X.shape[1], reps=2, entanglement='linear')
    quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)
    kernel_train = quantum_kernel.evaluate(X_train_list, X_train_list)
    kernel_test = quantum_kernel.evaluate(X_test_list, X_train_list)

    # Train QSVM
    clf = SVC(kernel='precomputed', probability=True)
    clf.fit(kernel_train, y_train)
    y_pred = clf.predict(kernel_test)
    y_proba = clf.predict_proba(kernel_test)[:, 1]

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    roc = roc_auc_score(y_test, y_proba)
    report = classification_report(y_test, y_pred, target_names=["Clean", "Malicious"], zero_division=0)


    # Save metrics
    with open(f"{save_folder}/classification_report.txt", "w") as f:
        f.write(f"{algo_name} - QSVM\n")
        f.write(f"Accuracy: {acc:.2f}\nROC AUC: {roc:.2f}\n\n")
        f.write(report)

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure()
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=["Clean", "Malicious"],
                yticklabels=["Clean", "Malicious"])
    plt.title(f"Confusion Matrix – {algo_name}")
    plt.tight_layout()
    plt.savefig(f"{save_folder}/confusion_matrix.png", dpi=300)
    plt.close()

    # ROC Curve
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    plt.figure()
    plt.plot(fpr, tpr, label=f"AUC = {roc:.2f}")
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve – {algo_name}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"{save_folder}/roc_curve.png", dpi=300)
    plt.close()

    print(f"✅ {algo_name}: Accuracy={acc:.2f}, ROC AUC={roc:.2f}")
    return acc, roc


In [None]:
train_qsvm_on_dataset("dj_full_dataset.csv", "Deutsch-Jozsa (QSVM)", "qsvm_outputs/dj")
train_qsvm_on_dataset("qaoa_full_dataset.csv", "QAOA (QSVM)", "qsvm_outputs/qaoa")
train_qsvm_on_dataset("qft_full_dataset.csv", "QFT (QSVM)", "qsvm_outputs/qft")
train_qsvm_on_dataset("shor_full_dataset.csv", "Shor (QSVM)", "qsvm_outputs/shor")
train_qsvm_on_dataset("grover_full_dataset.csv", "Grover (QSVM)", "qsvm_outputs/grover")
train_qsvm_on_dataset("bv_full_dataset.csv", "BV (QSVM)", "qsvm_outputs/bv")
train_qsvm_on_dataset("universal_dataset.csv", "Universal (QSVM)", "qsvm_outputs/universal")


📂 Dataset: dj_full_dataset.csv
Class Distribution:
 label
0    10
1    10
Name: count, dtype: int64
✅ Deutsch-Jozsa (QSVM): Accuracy=0.75, ROC AUC=0.75
📂 Dataset: qaoa_full_dataset.csv
Class Distribution:
 label
0    10
1    10
Name: count, dtype: int64
✅ QAOA (QSVM): Accuracy=1.00, ROC AUC=1.00
📂 Dataset: qft_full_dataset.csv
Class Distribution:
 label
0    10
1    10
Name: count, dtype: int64
✅ QFT (QSVM): Accuracy=1.00, ROC AUC=1.00
📂 Dataset: shor_full_dataset.csv
Class Distribution:
 label
0    10
1    10
Name: count, dtype: int64
✅ Shor (QSVM): Accuracy=1.00, ROC AUC=1.00
📂 Dataset: grover_full_dataset.csv
Class Distribution:
 label
0    10
1    10
Name: count, dtype: int64
✅ Grover (QSVM): Accuracy=0.50, ROC AUC=0.00
📂 Dataset: bv_full_dataset.csv
Class Distribution:
 label
0    10
1    10
Name: count, dtype: int64
✅ BV (QSVM): Accuracy=0.75, ROC AUC=1.00
📂 Dataset: universal_dataset.csv
Class Distribution:
 label
0    60
1    60
Name: count, dtype: int64
