# 03 – Quantum Model Training

Loads `X_selected.csv`, `y.csv`, scales, applies **BorderlineSMOTE**, reduces to **6 qubits** with PCA, trains **QVC** and **QSVM**, and pickles the models + preprocessing pipeline.

**Inputs**  
- `X_selected.csv`  
- `y.csv`  
- `selected_features.pkl`

**Outputs**  
- `qvc_model.pkl`  
- `qsvm_model.pkl`  
- `preprocessing_objects.pkl`

In [None]:
!pip install -q "qiskit==1.2.4" "qiskit-aer==0.15.1" "qiskit-machine-learning==0.7.2" "qiskit-algorithms==0.3.0" imbalanced-learn

In [None]:
import pandas as pd, numpy as np, pickle, warnings, datetime
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from sklearn.decomposition import PCA
from imblearn.over_sampling import BorderlineSMOTE

from qiskit.circuit.library import ZZFeatureMap, TwoLocal
from qiskit_algorithms.optimizers import COBYLA
from qiskit.primitives import Sampler
from qiskit_aer import AerSimulator
from qiskit_machine_learning.algorithms import VQC
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.algorithms import QSVC

print(f"Start: {datetime.datetime.now():%Y-%m-%d %H:%M:%S}")

## 1. Load selected data

In [None]:
base = '/content/drive/MyDrive/QuantumBoost2025/'
X = pd.read_csv(base + 'X_selected.csv')
y = pd.read_csv(base + 'y.csv').squeeze()
with open(base + 'selected_features.pkl', 'rb') as f:
    selected_features = pickle.load(f)
print(f"X shape: {X.shape}, selected: {len(selected_features)}")

## 2. Train-test split & scaling

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

robust = RobustScaler()
X_train_r = robust.fit_transform(X_train)
X_test_r  = robust.transform(X_test)

mm = MinMaxScaler(feature_range=(0, 2*np.pi))
X_train_q = mm.fit_transform(X_train_r)
X_test_q  = mm.transform(X_test_r)

## 3. BorderlineSMOTE + subset for quantum

In [None]:
sm = BorderlineSMOTE(random_state=42, k_neighbors=5, kind='borderline-1')
X_res, y_res = sm.fit_resample(X_train_q, y_train)

sss = StratifiedShuffleSplit(n_splits=1, train_size=0.7, random_state=42)
for tr, _ in sss.split(X_res, y_res):
    X_q = X_res[tr]
    y_q = y_res[tr]
print(f"Quantum training subset: {X_q.shape}")

## 4. PCA → 6 qubits

In [None]:
n_qubits = 6
pca = PCA(n_components=n_qubits, random_state=42)
X_train_pca = pca.fit_transform(X_q)
X_test_pca  = pca.transform(X_test_q)

pca_scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
X_train_pca = pca_scaler.fit_transform(X_train_pca)
X_test_pca  = pca_scaler.transform(X_test_pca)
print(f"Explained variance: {pca.explained_variance_ratio_.sum():.4f}")

## 5. Quantum circuits

In [None]:
feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=3, entanglement='full')
ansatz      = TwoLocal(n_qubits, ['ry','rz'], 'cx', entanglement='full', reps=4)

## 6. QVC training

In [None]:
sampler   = Sampler()
optimizer = COBYLA(maxiter=50)

qvc = VQC(sampler=sampler, feature_map=feature_map, ansatz=ansatz, optimizer=optimizer)
qvc.fit(X_train_pca[:500], y_q[:500])   # limit for demo speed
print("QVC trained")

## 7. QSVM training

In [None]:
kernel = FidelityQuantumKernel(feature_map=feature_map)
qsvm   = QSVC(quantum_kernel=kernel)
qsvm.fit(X_train_pca[:500], y_q[:500])
print("QSVM trained")

## 8. Save everything

In [None]:
out = '/content/drive/MyDrive/QuantumBoost2025/'
with open(out+'qvc_model.pkl','wb') as f:   pickle.dump(qvc, f)
with open(out+'qsvm_model.pkl','wb') as f:  pickle.dump(qsvm, f)

prep = {'robust':robust, 'minmax':mm, 'pca':pca, 'pca_scaler':pca_scaler,
        'features':selected_features}
with open(out+'preprocessing_objects.pkl','wb') as f: pickle.dump(prep, f)
print("All artefacts saved.")