In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from qiskit.utils import QuantumInstance
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.algorithms import QSVC
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from qiskit import Aer
import joblib
import warnings
warnings.filterwarnings("ignore")

In [None]:
# ============================ #
#        Configurable Params   #
# ============================ #
NUM_FEATURES = 15     # Set -1 to use all available features as qubits
TRAIN_SIZE = 7000     # Set -1 to use all available samples
CIRCUIT_REPS = 2      # Depth of feature map
TEST_RATIO = 0.25

In [None]:
# ============================ #
#         Load Dataset         #
# ============================ #
print("Loading angle-encoded training and testing datasets...")
train_df = pd.read_excel("/content/tii_ssrc23_angle_encoded_train.xlsx")
test_df = pd.read_excel("/content/tii_ssrc23_angle_encoded_test.xlsx")

full_df = pd.concat([train_df, test_df], ignore_index=True)
X_full = full_df.drop(columns=['label'])
y_full = full_df['label']

print(f"Total Samples: {len(X_full)}, Total Classes: {len(np.unique(y_full))}")

In [None]:
# ============================ #
#   Stratified Downsampling    #
# ============================ #
num_classes = len(np.unique(y_full))

if TRAIN_SIZE == -1 or TRAIN_SIZE >= len(X_full):
    print("Using full dataset (no downsampling)")
    X_train = X_full
    y_train = y_full
    X_test = pd.DataFrame([], columns=X_full.columns)
    y_test = pd.Series([], dtype=int)
else:
    TEST_SIZE = int(TEST_RATIO * TRAIN_SIZE)
    if TEST_SIZE < num_classes:
        min_train = int(np.ceil(num_classes / (1 - TEST_RATIO)))
        raise ValueError(
            f"TRAIN_SIZE={TRAIN_SIZE} too small for {num_classes} classes with {int(TEST_RATIO*100)}% test split.\n"
            f"Set TRAIN_SIZE to at least {min_train}."
        )

    sss = StratifiedShuffleSplit(n_splits=1, train_size=TRAIN_SIZE, test_size=TEST_SIZE, random_state=42)
    for train_index, test_index in sss.split(X_full, y_full):
        X_train = X_full.iloc[train_index]
        y_train = y_full.iloc[train_index]
        X_test = X_full.iloc[test_index]
        y_test = y_full.iloc[test_index]

# ============================ #
#     Feature Selection (PCA)  #
# ============================ #
if NUM_FEATURES == -1 or NUM_FEATURES > X_train.shape[1]:
    NUM_FEATURES = X_train.shape[1]

print(f"\nApplying PCA to reduce to {NUM_FEATURES} features...")

# Standardize before PCA
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

pca = PCA(n_components=NUM_FEATURES, random_state=42)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

print(f"\nQSVC with {NUM_FEATURES} qubits, {len(X_train_pca)} training samples, {len(X_test_pca)} test samples")

In [None]:
# ============================ #
#       QSVC (Quantum SVM)     #
# ============================ #
backend = Aer.get_backend('aer_simulator_statevector')
qi = QuantumInstance(backend=backend, shots=1024, seed_simulator=42, seed_transpiler=42)
feature_map = ZZFeatureMap(feature_dimension=NUM_FEATURES, reps=CIRCUIT_REPS, entanglement='linear')
quantum_kernel = QuantumKernel(feature_map=feature_map, quantum_instance=qi)