In [1]:
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.decomposition import PCA

df = pd.read_csv('pd_speech_features.csv', header=1)


selector = VarianceThreshold(threshold=0.1)
selected_features = selector.fit_transform(df.drop(columns=['id', 'gender', 'class']))

scaler = StandardScaler()
scaled_features = scaler.fit_transform(selected_features)

pca_scaled = PCA(n_components=150, svd_solver='full')
pca_scaled.fit(scaled_features)

variance_scaled = pca_scaled.explained_variance_ratio_.cumsum()


transformed_scaled = pca_scaled.transform(scaled_features)

df_transformed_scaled = pd.DataFrame(data=transformed_scaled, columns=[f"PC{i+1}" for i in range(transformed_scaled.shape[1])])


X, y = pd.concat([df[["gender"]], df_transformed_scaled], axis=1), df["class"]
y_scaled = 2 * (y - 0.5)

In [2]:
import pennylane as qml

nqubits = 6
dev = qml.device("lightning.qubit", wires=nqubits)

@qml.qnode(dev)
def kernel_circ(a, b):
    qml.AmplitudeEmbedding(
        a, wires=range(nqubits), pad_with=0, normalize=True)
    qml.adjoint(qml.AmplitudeEmbedding(
        b, wires=range(nqubits), pad_with=0, normalize=True))
    return qml.probs(wires=range(nqubits))


from sklearn.svm import SVC
def qkernel(A, B):
    K = np.zeros((len(A), len(B)))
    for i, a in enumerate(A):
        print(f"Row {i+1}/{len(A)}...") #logging
        for j, b in enumerate(B):
            if j<i:
                K[i, j] = K[j, i]
                continue
            K[i, j] = kernel_circ(a, b)[0]
        print(f"  Finished row {i+1}")
    print("\t\tKERNEL\t\t\n\n")
    print(K)
    print("\n\n")
    #save kernel to file
    np.save("quantum_kernel.npy", K)
    return K


In [3]:
svm = SVC(kernel=qkernel, random_state=15)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_scaled, test_size=0.2, random_state=15)
X_train, y_train = X_train.to_numpy(), y_train.to_numpy()
X_test, y_test = X_test.to_numpy(), y_test.to_numpy()

In [5]:
svm.fit(X_train[:, :64], y_train)

Row 1/604...
  Finished row 1
Row 2/604...
  Finished row 2
Row 3/604...
  Finished row 3
Row 4/604...
  Finished row 4
Row 5/604...
  Finished row 5
Row 6/604...
  Finished row 6
Row 7/604...
  Finished row 7
Row 8/604...
  Finished row 8
Row 9/604...
  Finished row 9
Row 10/604...
  Finished row 10
Row 11/604...
  Finished row 11
Row 12/604...
  Finished row 12
Row 13/604...
  Finished row 13
Row 14/604...
  Finished row 14
Row 15/604...
  Finished row 15
Row 16/604...
  Finished row 16
Row 17/604...
  Finished row 17
Row 18/604...
  Finished row 18
Row 19/604...
  Finished row 19
Row 20/604...
  Finished row 20
Row 21/604...
  Finished row 21
Row 22/604...
  Finished row 22
Row 23/604...
  Finished row 23
Row 24/604...
  Finished row 24
Row 25/604...
  Finished row 25
Row 26/604...
  Finished row 26
Row 27/604...
  Finished row 27
Row 28/604...
  Finished row 28
Row 29/604...
  Finished row 29
Row 30/604...
  Finished row 30
Row 31/604...
  Finished row 31
Row 32/604...
  Finished r

0,1,2
,C,1.0
,kernel,<function qke...x7c4851e83400>
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [7]:
predictions = svm.predict(X_test[:, :64]) 

Row 1/152...
  Finished row 1
Row 2/152...
  Finished row 2
Row 3/152...
  Finished row 3
Row 4/152...
  Finished row 4
Row 5/152...
  Finished row 5
Row 6/152...
  Finished row 6
Row 7/152...
  Finished row 7
Row 8/152...
  Finished row 8
Row 9/152...
  Finished row 9
Row 10/152...
  Finished row 10
Row 11/152...
  Finished row 11
Row 12/152...
  Finished row 12
Row 13/152...
  Finished row 13
Row 14/152...
  Finished row 14
Row 15/152...
  Finished row 15
Row 16/152...
  Finished row 16
Row 17/152...
  Finished row 17
Row 18/152...
  Finished row 18
Row 19/152...
  Finished row 19
Row 20/152...
  Finished row 20
Row 21/152...
  Finished row 21
Row 22/152...
  Finished row 22
Row 23/152...
  Finished row 23
Row 24/152...
  Finished row 24
Row 25/152...
  Finished row 25
Row 26/152...
  Finished row 26
Row 27/152...
  Finished row 27
Row 28/152...
  Finished row 28
Row 29/152...
  Finished row 29
Row 30/152...
  Finished row 30
Row 31/152...
  Finished row 31
Row 32/152...
  Finished r

In [8]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

[[23 12]
 [44 73]]
              precision    recall  f1-score   support

        -1.0       0.34      0.66      0.45        35
         1.0       0.86      0.62      0.72       117

    accuracy                           0.63       152
   macro avg       0.60      0.64      0.59       152
weighted avg       0.74      0.63      0.66       152



## Other Experiments

# Angle encoding

In [53]:
import pennylane as qml
from pennylane import AngleEmbedding

n_qubits = 8

dev_kernel = qml.device("lightning.qubit", wires=n_qubits)

projector = np.zeros((2 ** n_qubits, 2 ** n_qubits))
projector[0, 0] = 1

@qml.qnode(dev_kernel)
def kernel(x1, x2):
    """The quantum kernel."""
    AngleEmbedding(x1, wires=range(n_qubits))
    qml.adjoint(AngleEmbedding)(x2, wires=range(n_qubits))
    return qml.expval(qml.Hermitian(projector, wires=range(n_qubits)))

def kernel_matrix(A, B):
    """Compute the matrix whose entries are the kernel
    evaluated on pairwise data from sets A and B."""
    K = np.zeros((len(A), len(B)))
    for i, a in enumerate(A):
        print(f"Row {i+1}/{len(A)}...")
        for j, b in enumerate(B):
            if j<i:
                K[i, j] = K[j, i]
                continue
            K[i, j] = kernel(a, b)
    return K




In [54]:
from sklearn.svm import SVC
svm = SVC(kernel=kernel_matrix, random_state=15)

In [55]:
svm.fit(X_train[:, :8], y_train)

Row 1/453...
Row 2/453...
Row 3/453...
Row 4/453...
Row 5/453...
Row 6/453...
Row 7/453...
Row 8/453...
Row 9/453...
Row 10/453...
Row 11/453...
Row 12/453...
Row 13/453...
Row 14/453...
Row 15/453...
Row 16/453...
Row 17/453...
Row 18/453...
Row 19/453...
Row 20/453...
Row 21/453...
Row 22/453...
Row 23/453...
Row 24/453...
Row 25/453...
Row 26/453...
Row 27/453...
Row 28/453...
Row 29/453...
Row 30/453...
Row 31/453...
Row 32/453...
Row 33/453...
Row 34/453...
Row 35/453...
Row 36/453...
Row 37/453...
Row 38/453...
Row 39/453...
Row 40/453...
Row 41/453...
Row 42/453...
Row 43/453...
Row 44/453...
Row 45/453...
Row 46/453...
Row 47/453...
Row 48/453...
Row 49/453...
Row 50/453...
Row 51/453...
Row 52/453...
Row 53/453...
Row 54/453...
Row 55/453...
Row 56/453...
Row 57/453...
Row 58/453...
Row 59/453...
Row 60/453...
Row 61/453...
Row 62/453...
Row 63/453...
Row 64/453...
Row 65/453...
Row 66/453...
Row 67/453...
Row 68/453...
Row 69/453...
Row 70/453...
Row 71/453...
Row 72/453...
R

0,1,2
,C,1.0
,kernel,<function ker...x76cc9c681090>
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [58]:
angel_predictions = svm.predict(X_test[:, :8])

Row 1/152...
Row 2/152...
Row 3/152...
Row 4/152...
Row 5/152...
Row 6/152...
Row 7/152...
Row 8/152...
Row 9/152...
Row 10/152...
Row 11/152...
Row 12/152...
Row 13/152...
Row 14/152...
Row 15/152...
Row 16/152...
Row 17/152...
Row 18/152...
Row 19/152...
Row 20/152...
Row 21/152...
Row 22/152...
Row 23/152...
Row 24/152...
Row 25/152...
Row 26/152...
Row 27/152...
Row 28/152...
Row 29/152...
Row 30/152...
Row 31/152...
Row 32/152...
Row 33/152...
Row 34/152...
Row 35/152...
Row 36/152...
Row 37/152...
Row 38/152...
Row 39/152...
Row 40/152...
Row 41/152...
Row 42/152...
Row 43/152...
Row 44/152...
Row 45/152...
Row 46/152...
Row 47/152...
Row 48/152...
Row 49/152...
Row 50/152...
Row 51/152...
Row 52/152...
Row 53/152...
Row 54/152...
Row 55/152...
Row 56/152...
Row 57/152...
Row 58/152...
Row 59/152...
Row 60/152...
Row 61/152...
Row 62/152...
Row 63/152...
Row 64/152...
Row 65/152...
Row 66/152...
Row 67/152...
Row 68/152...
Row 69/152...
Row 70/152...
Row 71/152...
Row 72/152...
R

In [59]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, angel_predictions))
print(classification_report(y_test, angel_predictions))

[[  2  33]
 [  3 114]]
              precision    recall  f1-score   support

        -1.0       0.40      0.06      0.10        35
         1.0       0.78      0.97      0.86       117

    accuracy                           0.76       152
   macro avg       0.59      0.52      0.48       152
weighted avg       0.69      0.76      0.69       152



In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(svm.predict(X_test[:, :6]), y_test))

0.5131578947368421


In [None]:
from sklearn.metrics import classification_report
print(classification_report(svm.predict(X_test[:, :6]), y_test))

              precision    recall  f1-score   support

        -1.0       0.09      0.30      0.13        10
         1.0       0.94      0.77      0.85       142

    accuracy                           0.74       152
   macro avg       0.51      0.54      0.49       152
weighted avg       0.88      0.74      0.80       152



In [None]:
pca_scaled.explained_variance_ratio_.cumsum()[:50]

array([0.22903833, 0.34432509, 0.44397438, 0.48226919, 0.51015981,
       0.5372165 , 0.56256489, 0.58392064, 0.60378196, 0.62320154,
       0.64176394, 0.65776482, 0.67330645, 0.68808137, 0.7016061 ,
       0.71392211, 0.72510308, 0.73549408, 0.74575494, 0.75552709,
       0.76485879, 0.77356183, 0.78204747, 0.7898924 , 0.7972759 ,
       0.80406404, 0.81071625, 0.81712266, 0.82289481, 0.82837587,
       0.83356297, 0.83857102, 0.84320304, 0.8477888 , 0.85233169,
       0.85662745, 0.86082281, 0.86495675, 0.86906397, 0.87298568,
       0.8768181 , 0.88035072, 0.88384063, 0.88715539, 0.89041125,
       0.89352168, 0.89661358, 0.89954863, 0.90244066, 0.90522866])

In [None]:
# classical benchmark

from sklearn.svm import SVC

model = SVC()

model.fit(X_train, y_train)

print(model.score(X_test, y_test))


0.875


In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

model.fit(X_train[:,:5], y_train)

print(model.score(X_test[:,:5], y_test))


0.8289473684210527


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept


In [None]:
from sklearn.metrics import classification_report, confusion_matrix


y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 17  18]
 [  1 116]]
              precision    recall  f1-score   support

        -1.0       0.94      0.49      0.64        35
         1.0       0.87      0.99      0.92       117

    accuracy                           0.88       152
   macro avg       0.91      0.74      0.78       152
weighted avg       0.88      0.88      0.86       152



In [None]:
X_train

array([[ 1.00000000e+00, -9.00776449e+00, -1.46201812e+01, ...,
         1.17057764e-01, -1.53903436e-01, -3.28671148e-01],
       [ 1.00000000e+00, -3.44069601e+00, -2.08999290e+00, ...,
         1.40390946e-02,  1.70018320e-01,  1.48733504e-01],
       [ 1.00000000e+00, -1.35936202e+01, -1.57794468e+01, ...,
         6.09411547e-02, -3.68417570e-01,  1.78274746e-01],
       ...,
       [ 1.00000000e+00,  2.07258596e+00, -6.86171315e+00, ...,
        -9.80399371e-02, -8.17510739e-02,  2.21665445e-02],
       [ 0.00000000e+00, -3.05898917e+00,  5.89082629e+00, ...,
         1.61820350e-01,  3.53312687e-01,  1.53004761e-01],
       [ 0.00000000e+00,  3.83496411e+00, -1.02977401e+00, ...,
         6.85876540e-02, -8.47900587e-01, -1.66294452e-01]],
      shape=(453, 151))

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_scaled, test_size=0.2, random_state=15)
X_train, y_train = X_train.to_numpy(), y_train.to_numpy()
X_test, y_test = X_test.to_numpy(), y_test.to_numpy()

In [9]:
import optuna
from sklearn.svm import SVC
from sklearn.metrics import recall_score
def objective(trial):
    params = {
    'C': trial.suggest_float('C', 0.1, 100),
    'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),
    'kernel': trial.suggest_categorical('kernel', ['rbf', 'linear', 'poly']),
    "probability":False
    }
    model = SVC(**params, random_state=15)
    model.fit(X_train[:, :64], y_train)
    return recall_score(y_test, model.predict(X_test[:, :64]), pos_label=-1)
    # return model.score(X_test[:, :64], y_test)

study_baseline = optuna.create_study(direction="maximize")
study_baseline.optimize(objective, n_trials=100)


[I 2025-09-05 14:05:48,645] A new study created in memory with name: no-name-54f095fa-2ee3-4d25-a130-410237ec652c
[I 2025-09-05 14:05:48,671] Trial 0 finished with value: 0.4 and parameters: {'C': 0.7143109207960794, 'gamma': 'scale', 'kernel': 'poly'}. Best is trial 0 with value: 0.4.
[I 2025-09-05 14:05:48,686] Trial 1 finished with value: 0.7428571428571429 and parameters: {'C': 93.4803589438332, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 1 with value: 0.7428571428571429.
[I 2025-09-05 14:05:48,701] Trial 2 finished with value: 0.5714285714285714 and parameters: {'C': 87.1840776965869, 'gamma': 'auto', 'kernel': 'rbf'}. Best is trial 1 with value: 0.7428571428571429.
[I 2025-09-05 14:05:52,908] Trial 3 finished with value: 0.5428571428571428 and parameters: {'C': 42.76058318240929, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 1 with value: 0.7428571428571429.
[I 2025-09-05 14:05:54,482] Trial 4 finished with value: 0.5428571428571428 and parameters: {'C': 19.2809526225

KeyboardInterrupt: 

In [None]:
print(study_baseline.best_params)

{'C': 10.200101229666812, 'gamma': 'auto', 'kernel': 'poly'}


In [64]:
model = SVC(C=study_baseline.best_params['C'], gamma=study_baseline.best_params['gamma'], kernel=study_baseline.best_params['kernel'], probability=True, random_state=15)
model.fit(X_train[:, :64], y_train)

0,1,2
,C,10.200101229666812
,kernel,'poly'
,degree,3
,gamma,'auto'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,


In [6]:
from sklearn.metrics import classification_report
print(classification_report(y_test, model.predict(X_test[:, :64])))

              precision    recall  f1-score   support

        -1.0       0.85      0.80      0.82        35
         1.0       0.94      0.96      0.95       117

    accuracy                           0.92       152
   macro avg       0.89      0.88      0.89       152
weighted avg       0.92      0.92      0.92       152



In [24]:
X_train.shape, X_test.shape

((604, 151), (152, 151))

In [None]:
np.savez()

In [65]:
def objective(trial):
    params = {
    'C': trial.suggest_float('C', 0.1, 100),
    'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),
    'kernel': trial.suggest_categorical('kernel', ['rbf', 'linear', 'poly']),
    "probability":False
    }
    model = SVC(**params, random_state=15)
    model.fit(X_train[:, :64], y_train)
    return recall_score(y_test, model.predict(X_test[:, :64]), pos_label=-1)
    # return model.score(X_test[:, :64], y_test)

study_baseline = optuna.create_study(direction="maximize")
study_baseline.optimize(objective, n_trials=100)

[I 2025-09-05 12:17:44,369] A new study created in memory with name: no-name-0f7ae522-0427-4fbf-a4a2-5eb4804bc170
[I 2025-09-05 12:17:44,390] Trial 0 finished with value: 0.7428571428571429 and parameters: {'C': 57.73983287795363, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 0 with value: 0.7428571428571429.
[I 2025-09-05 12:17:44,403] Trial 1 finished with value: 0.7428571428571429 and parameters: {'C': 69.12397130309775, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 0 with value: 0.7428571428571429.
[I 2025-09-05 12:17:52,791] Trial 2 finished with value: 0.5428571428571428 and parameters: {'C': 37.385687162973475, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 0 with value: 0.7428571428571429.
[I 2025-09-05 12:17:52,803] Trial 3 finished with value: 0.6571428571428571 and parameters: {'C': 65.0740026537046, 'gamma': 'scale', 'kernel': 'poly'}. Best is trial 0 with value: 0.7428571428571429.
[I 2025-09-05 12:17:52,821] Trial 4 finished with value: 0.5714285714285714 and

In [66]:
model = SVC(kernel=qkernel, random_state=15)


In [67]:
model.fit(X_train[:, :64], y_train)

Row 1/604...


ValueError: Features must be of length 8 or less; got length 64.

In [None]:
predicted = model.predict(X_test[:, :64])

Row 1/152...
  Finished row 1
Row 2/152...
  Finished row 2
Row 3/152...
  Finished row 3
Row 4/152...
  Finished row 4
Row 5/152...
  Finished row 5
Row 6/152...
  Finished row 6
Row 7/152...
  Finished row 7
Row 8/152...
  Finished row 8
Row 9/152...
  Finished row 9
Row 10/152...
  Finished row 10
Row 11/152...
  Finished row 11
Row 12/152...
  Finished row 12
Row 13/152...
  Finished row 13
Row 14/152...
  Finished row 14
Row 15/152...
  Finished row 15
Row 16/152...
  Finished row 16
Row 17/152...
  Finished row 17
Row 18/152...
  Finished row 18
Row 19/152...
  Finished row 19
Row 20/152...
  Finished row 20
Row 21/152...
  Finished row 21
Row 22/152...
  Finished row 22
Row 23/152...
  Finished row 23
Row 24/152...
  Finished row 24
Row 25/152...
  Finished row 25
Row 26/152...
  Finished row 26
Row 27/152...
  Finished row 27
Row 28/152...
  Finished row 28
Row 29/152...
  Finished row 29
Row 30/152...
  Finished row 30
Row 31/152...
  Finished row 31
Row 32/152...
  Finished r

In [None]:
print(classification_report(y_test, predicted))

              precision    recall  f1-score   support

        -1.0       0.34      0.66      0.45        35
         1.0       0.86      0.62      0.72       117

    accuracy                           0.63       152
   macro avg       0.60      0.64      0.59       152
weighted avg       0.74      0.63      0.66       152



In [180]:
svq_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel=qkernel, class_weight='balanced'))
])

In [None]:
svq_pipe.fit(X_train_subset, y_train_subset)

Row 1/604...
  Finished row 1
Row 2/604...
  Finished row 2
Row 3/604...
  Finished row 3
Row 4/604...
  Finished row 4
Row 5/604...
  Finished row 5
Row 6/604...
  Finished row 6
Row 7/604...
  Finished row 7
Row 8/604...
  Finished row 8
Row 9/604...
  Finished row 9
Row 10/604...
  Finished row 10
Row 11/604...
  Finished row 11
Row 12/604...
  Finished row 12
Row 13/604...
  Finished row 13
Row 14/604...
  Finished row 14
Row 15/604...
  Finished row 15
Row 16/604...
  Finished row 16
Row 17/604...
  Finished row 17
Row 18/604...
  Finished row 18
Row 19/604...
  Finished row 19
Row 20/604...
  Finished row 20
Row 21/604...
  Finished row 21
Row 22/604...
  Finished row 22
Row 23/604...
  Finished row 23
Row 24/604...
  Finished row 24
Row 25/604...
  Finished row 25
Row 26/604...
  Finished row 26
Row 27/604...
  Finished row 27
Row 28/604...
  Finished row 28
Row 29/604...
  Finished row 29
Row 30/604...
  Finished row 30
Row 31/604...
  Finished row 31
Row 32/604...
  Finished r