In [25]:
import numpy as np
from sklearn.datasets import load_iris, load_wine, load_breast_cancer, load_digits, fetch_lfw_people, fetch_olivetti_faces #### add any other dataset
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import time

#----------------------------
# Quantum kernel helpers :
def prepare_target_dim(n_qubits):     ## dim prep: simulate a hilbert space
    return 2 ** n_qubits

def fit_pca_if_needed(X_train, target_dim):                 ### pca for dim reduction
    if X_train.shape[1] > target_dim:                       ### in quantum-inspired if 2^n_qubits isnt enough e.g
        pca = PCA(n_components=target_dim, random_state=0)
        pca.fit(X_train)
        return pca
    else:
        return None

def amplitude_encode(X, n_qubits, pca=None):
    D = 2 ** n_qubits                                                           #### this part  mimics how a quantum state would represent the data.
    X_proc = X.copy()                                                           ### it maps classical data vectors into normalized amplitude vectors,
    if pca is not None:                                                         ###  as if they were quantum states.
        X_proc = pca.transform(X_proc)                                          ## and ofc we adjust dimensions
    if X_proc.shape[1] < D:
        pad_width = D - X_proc.shape[1]
        X_proc = np.hstack([X_proc, np.zeros((X_proc.shape[0], pad_width))])
    if X_proc.shape[1] > D:
        X_proc = X_proc[:, :D]
    norms = np.linalg.norm(X_proc, axis=1, keepdims=True)
    norms[norms == 0] = 1e-12
    return X_proc / norms

def quantum_kernel_matrix(amps1, amps2=None):
    if amps2 is None:                         #### here we compute inner product, similarity
       amps2 = amps1                          ####
    inner = np.dot(amps1, amps2.T)
    return np.abs(inner) ** 2

def run_cv(dataset_name="iris", n_qubits=3, n_splits=5):
    # Load dataset//// add any other dataset here
    if dataset_name == "iris":
        data = load_iris()
    elif dataset_name == "wine":
        data = load_wine()
    elif dataset_name == "breast_cancer":
        data = load_breast_cancer()
    elif dataset_name == "digits":
        data = load_digits()
    elif dataset_name == "lfw":
        data = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
    elif dataset_name== "olivetti":
        data = fetch_olivetti_faces()
    else:
        raise ValueError("Dataset not supported")

    X = data.data
    y = data.target

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    acc_q_list, acc_rbf_list, acc_rf_list = [], [], []
    time_q_list, time_rbf_list, time_rf_list = [], [], []

    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # scaling
        scaler = StandardScaler()
        X_train_s = scaler.fit_transform(X_train)
        X_test_s = scaler.transform(X_test)

        # Quantum amplitude encoding  #### use  other encodings: e.g angles
        D = prepare_target_dim(n_qubits)
        pca = fit_pca_if_needed(X_train_s, D)
        amps_train = amplitude_encode(X_train_s, n_qubits, pca=pca)
        amps_test = amplitude_encode(X_test_s, n_qubits, pca=pca)

        # ---- Quantum inspired SVM -----------------
        start_q = time.time()
        K_train = quantum_kernel_matrix(amps_train)
        K_test = quantum_kernel_matrix(amps_test, amps_train)
        clf_q = SVC(kernel="precomputed", C=1.0)
        clf_q.fit(K_train, y_train)
        y_pred_q = clf_q.predict(K_test)
        end_q = time.time()
        acc_q_list.append(accuracy_score(y_test, y_pred_q))
        time_q_list.append(end_q - start_q)

        # ---RBF SVM -----------------
        start_rbf = time.time()
        clf_rbf = SVC(kernel="rbf", C=1.0, gamma="scale")
        clf_rbf.fit(X_train_s, y_train)
        y_pred_rbf = clf_rbf.predict(X_test_s)
        end_rbf = time.time()
        acc_rbf_list.append(accuracy_score(y_test, y_pred_rbf))
        time_rbf_list.append(end_rbf - start_rbf)

        #Random Forest -----------------
        start_rf = time.time()
        clf_rf = RandomForestClassifier()
        clf_rf.fit(X_train, y_train)
        y_pred_rf = clf_rf.predict(X_test)
        end_rf = time.time()
        acc_rf_list.append(accuracy_score(y_test, y_pred_rf))
        time_rf_list.append(end_rf - start_rf)


    ###Results:
    print(f"{dataset_name.capitalize()} dataset → Quantum-kernel SVM: {np.mean(acc_q_list):.4f} ± {np.std(acc_q_list):.4f}, Time: {np.mean(time_q_list):.2f}s")
    print(f"{dataset_name.capitalize()} dataset → RBF SVM baseline: {np.mean(acc_rbf_list):.4f} ± {np.std(acc_rbf_list):.4f}, Time: {np.mean(time_rbf_list):.2f}s")
    print(f"{dataset_name.capitalize()} dataset → Random Forest:      {np.mean(acc_rf_list):.4f} ± {np.std(acc_rf_list):.4f}, Time: {np.mean(time_rf_list):.2f}s\n")

    return acc_q_list, acc_rbf_list, acc_rf_list, time_q_list, time_rbf_list, time_rf_list



#############
if __name__ == "__main__":
    run_cv("iris", n_qubits=2)
    run_cv("wine", n_qubits=4)
    run_cv("breast_cancer", n_qubits=4)
    run_cv("digits", n_qubits=6)
    run_cv("lfw", n_qubits=11)
    run_cv("olivetti", n_qubits=7)


Iris dataset → Quantum-kernel SVM: 0.6800 ± 0.1204, Time: 0.00s
Iris dataset → RBF SVM baseline: 0.9600 ± 0.0389, Time: 0.00s
Iris dataset → Random Forest:      0.9533 ± 0.0340, Time: 0.16s

Wine dataset → Quantum-kernel SVM: 0.9046 ± 0.0222, Time: 0.00s
Wine dataset → RBF SVM baseline: 0.9833 ± 0.0222, Time: 0.00s
Wine dataset → Random Forest:      0.9887 ± 0.0138, Time: 0.20s

Breast_cancer dataset → Quantum-kernel SVM: 0.7996 ± 0.0179, Time: 0.01s
Breast_cancer dataset → RBF SVM baseline: 0.9772 ± 0.0163, Time: 0.01s
Breast_cancer dataset → Random Forest:      0.9526 ± 0.0180, Time: 0.32s

Digits dataset → Quantum-kernel SVM: 0.9783 ± 0.0087, Time: 0.13s
Digits dataset → RBF SVM baseline: 0.9839 ± 0.0060, Time: 0.13s
Digits dataset → Random Forest:      0.9750 ± 0.0047, Time: 0.57s

Lfw dataset → Quantum-kernel SVM: 0.5939 ± 0.0202, Time: 0.25s
Lfw dataset → RBF SVM baseline: 0.7523 ± 0.0199, Time: 1.81s
Lfw dataset → Random Forest:      0.6266 ± 0.0138, Time: 5.06s

Olivetti datase