# Quantum Kernel Machine Learning

To implement our use case, few things need to be considered. As usual, we will import our data, make some preprocessing such as check for missing values, split the data into training and testing datasets, rescale the data using normalization. An important step is feature extraction such as principal component analysis. We need to reduce our data from 43 features to 2 features. We could also use feature selection for dimension reduction. The encoding function is specified in a data_map function (for example data_map_12). 

In [10]:
# Loading your IBM Quantum account: https://quantum-computing.ibm.com
# It requires us to sign with an IBMQ account.
#from qiskit import IBMQ
#IBMQ.save_account('Y O U R   A P I')

# Import utilities
#import matplotlib.pyplot as plt
#import numpy as np
#import pandas as pd
#from functools import reduce

# sklearn imports
#from sklearn import preprocessing
#from sklearn.preprocessing import LabelEncoder
#from sklearn.model_selection import train_test_split
#from sklearn.svm import SVC

# Importing standard Qiskit libraries and Qiskit Machine Learning imports
#from qiskit import BasicAer
#from qiskit.utils import QuantumInstance, algorithm_globals
#from qiskit_machine_learning.algorithms import QSVC
#from qiskit_machine_learning.kernels import QuantumKernel
#from qiskit_machine_learning.datasets import ad_hoc_data
#from qiskit.algorithms.optimizers import COBYLA
#from qiskit.circuit.library import ZZFeatureMap, PauliFeatureMap

# Importing utilities for data processing and visualization
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


# Importing scikit-learn utilities for classification and evaluation
#from sklearn import preprocessing
#from sklearn.preprocessing import LabelEncoder
#from sklearn.model_selection import train_test_split
#from sklearn.svm import SVC
#from sklearn import metrics
#from sklearn.model_selection import cross_val_score
#from sklearn.metrics import classification_report

# Importing qiskit_machine_learning utilities for classification and evaluation
#from qiskit_machine_learning.algorithms import QSVC, PegasosQSVC
#from qiskit_machine_learning.kernels.algorithms import QuantumKernelTrainer

We can choose to compute code with local simulator (Aer simulators) or with online quantum simulators and of course real quantum hardware (Comment / Uncomment the lines below)

In [11]:
# Compute code with local simulator (Aer simulators)
from qiskit.primitives import Sampler
sampler = Sampler()


## Compute code with online quantum simulators or quantum hardware from the cloud
## Import QiskitRuntimeService and Sampler
#from qiskit_ibm_runtime import QiskitRuntimeService, Sampler
## Define service
#service = QiskitRuntimeService(channel = 'ibm_quantum', token = ibm_account, instance = 'ibm-q/open/main')
## Get backend
#quantum_backend = "ibmq_bogota"
#backend = service.backend(quantum_backend) # Use a simulator or hardware from the cloud
## Define Sampler with diferent options
## resilience_level=1 adds readout error mitigation
## execution.shots is the number of shots
## optimization_level=3 adds dynamical decoupling
#from qiskit_ibm_runtime import Options
#options = Options()
#options.resilience_level = 1
#options.execution.shots = 1024
#options.optimization_level = 3
#sampler = Sampler(session=backend, options = options)

In [12]:
# seed for randomization, to keep outputs consistent
from qiskit.utils import algorithm_globals
seed = 123456
algorithm_globals.random_seed = seed

In [13]:
# Encoding Functions
from functools import reduce

def data_map_8(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: np.pi*(m * n), x)
    return coeff

def data_map_9(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: (np.pi/2)*(m * n), 1 - x)
    return coeff

def data_map_10(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: np.pi*np.exp(((n - m)*(n - m))/8), x)
    return coeff

def data_map_11(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: (np.pi/3)*(m * n), 1/(np.cos(x)))
    return coeff

def data_map_12(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: np.pi*(m * n), np.cos(x))
    return coeff

In [14]:
# Quantum Feature Mapping with feature_dimension = 2 and reps = 2
from qiskit.circuit.library import PauliFeatureMap

qfm_default = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full')
print(qfm_default)
qfm_8 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_8)
print(qfm_8)
qfm_9 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_9)
print(qfm_9)
qfm_10 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_10)
print(qfm_10)
qfm_11 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_11)
print(qfm_11)
qfm_12 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_12)
print(qfm_12)

     ┌─────────────────────────────┐
q_0: ┤0                            ├
     │  PauliFeatureMap(x[0],x[1]) │
q_1: ┤1                            ├
     └─────────────────────────────┘
     ┌─────────────────────────────┐
q_0: ┤0                            ├
     │  PauliFeatureMap(x[0],x[1]) │
q_1: ┤1                            ├
     └─────────────────────────────┘
     ┌─────────────────────────────┐
q_0: ┤0                            ├
     │  PauliFeatureMap(x[0],x[1]) │
q_1: ┤1                            ├
     └─────────────────────────────┘
     ┌─────────────────────────────┐
q_0: ┤0                            ├
     │  PauliFeatureMap(x[0],x[1]) │
q_1: ┤1                            ├
     └─────────────────────────────┘
     ┌─────────────────────────────┐
q_0: ┤0                            ├
     │  PauliFeatureMap(x[0],x[1]) │
q_1: ┤1                            ├
     └─────────────────────────────┘
     ┌─────────────────────────────┐
q_0: ┤0                            ├
 

We utilize the default implementation of the Sampler primitive and the ComputeUncompute fidelity, which calculates the overlaps between states.If you do not provide specific instances of Sampler or Fidelity, the code will automatically create these objects with the default values.

In [15]:
from qiskit.algorithms.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel
fidelity = ComputeUncompute(sampler=sampler)
Q_Kernel_8 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_8)
Q_Kernel_9 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_9)
Q_Kernel_10 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_10)
Q_Kernel_11 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_11)
Q_Kernel_12 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_12)
Q_Kernel_default = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_default)

We process our data as we did for classical computing (load data, missing data, split of the data, normalization, PCA)

In [16]:
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Import dataset
data = '../data/datasets/neurons_binary.csv'
neuron = pd.read_csv(data, delimiter=';')

df = neuron.head(22).copy()                        # Principal
df = pd.concat([df, neuron.iloc[17034:17056]])     # Interneuron

# Drop row having at least 1 missing value
df = df.dropna()

# Creating an instance of Labelencoder
enc = LabelEncoder()
# Assigning numerical value and storing it
df[["Target"]] = df[["Target"]].apply(enc.fit_transform)

# Divide the data, y the variable to predict (Target) and X the features
X = df[df.columns[1:]]
y = df['Target']

# Splitting the data : training and test (20%)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Scaling the data
Normalize = preprocessing.StandardScaler()
# Transform data
X_train = Normalize.fit_transform(X_train)
X_train = pd.DataFrame(X_train, columns = X.columns)
X_test = Normalize.fit_transform(X_test)
X_test = pd.DataFrame(X_test, columns = X.columns)

# Dimension Reduction with PCA (with two principal compoenents)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
# transform data
X_train = pca.fit_transform(X_train)
X_test = pca.fit_transform(X_test)
# Define a new DataFrame with two column (the principal components)
component_columns = []
for x in (n+1 for n in range(2)):
    component_columns = component_columns + ['PCA_%i'%x]
X_train = pd.DataFrame(data = X_train, columns = component_columns)
X_test = pd.DataFrame(data = X_test, columns = component_columns)

print(X_train)
print(X_test)

        PCA_1     PCA_2
0   -2.246856  1.001576
1   10.456852 -0.864867
2   -6.478235 -0.469133
3   -1.196812 -1.323666
4   -6.189865 -1.503502
5   -6.064777 -1.038960
6    2.588510 -2.702076
7   -3.618969  2.981614
8    0.310268 -2.836980
9   -3.018028  0.251393
10  -1.423122  1.351361
11   3.969591  0.555156
12  -0.698979 -0.667258
13   5.008537  2.118542
14  -0.547348  1.042548
15  -6.022106 -1.572483
16   2.714648  8.179320
17   1.170691 -2.017913
18  -5.431129 -0.880815
19   1.283781 -1.080195
20  -4.300506 -0.645016
21  -2.486374 -0.284906
22   5.892424  0.779772
23  -4.295756  0.346126
24  -2.788814  3.233115
25  -6.570051 -1.796226
26   7.748237 -1.149488
27  -5.325899 -0.909251
28   2.388197  5.516065
29   6.763665 -2.122499
30  -1.684444  1.497672
31  11.773240  0.039079
32   2.614585 -3.518442
33   9.197619 -2.454461
34  -3.492777  0.944796
       PCA_1     PCA_2
0  -2.828170 -2.752159
1   1.331436 -1.650711
2  11.925533 -1.371046
3   2.394277 -0.203664
4  -6.883203  0.03044

Then, we iterate the different classifiers.

In [17]:
from qiskit_machine_learning.algorithms import QSVC

names = ["Q_Kernel_default", "Q_Kernel_8", "Q_Kernel_9",
         "Q_Kernel_10", "Q_Kernel_11", "Q_Kernel_12"]

classifiers = [
    QSVC(quantum_kernel=Q_Kernel_default),
    QSVC(quantum_kernel=Q_Kernel_8),
    QSVC(quantum_kernel=Q_Kernel_9),
    QSVC(quantum_kernel=Q_Kernel_10),
    QSVC(quantum_kernel=Q_Kernel_11),
    QSVC(quantum_kernel=Q_Kernel_12),
              ]

#model = QSVC(quantum_kernel=Q_Kernel_8)

#classifiers = [
#    SVC(kernel=Q_Kernel_default.evaluate),
#    SVC(kernel=Q_Kernel_8.evaluate),
#    SVC(kernel=Q_Kernel_9.evaluate),
#    SVC(kernel=Q_Kernel_10.evaluate),
#    SVC(kernel=Q_Kernel_11.evaluate),
#    SVC(kernel=Q_Kernel_12.evaluate),
#              ]


In [18]:
# Iterate over classifiers and give classification test score
for name, clf in zip(names, classifiers):
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        print(f'Callable kernel classification test score for {name}: {score}')

Callable kernel classification test score for Q_Kernel_default: 0.3333333333333333
Callable kernel classification test score for Q_Kernel_8: 0.4444444444444444
Callable kernel classification test score for Q_Kernel_9: 0.4444444444444444
Callable kernel classification test score for Q_Kernel_10: 0.6666666666666666
Callable kernel classification test score for Q_Kernel_11: 0.6666666666666666
Callable kernel classification test score for Q_Kernel_12: 0.6666666666666666


As we also did previously in classical computing, we can provide metrics about our model (Accuracy, Precision, Recall, F1 score, cross validation or use the classification report).

In [19]:
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report

# Provide metrics over classifiers
for name, clf in zip(names, classifiers):
        print("\n")
        print(name)
        print("\n")
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
    
        print("\n")
        print("Print predicted data coming from X_test as new input data")
        print(y_pred)
        print("\n")
        print("Print real values\n")
        print(y_test)
        print("\n")
    
        print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
        print("Precision:", metrics.precision_score(y_test, y_pred, average='micro'))
        print("Recall:", metrics.recall_score(y_test, y_pred, average='micro'))
        print("f1 Score:", metrics.f1_score(y_test, y_pred, average='micro'))
        print("Cross Validation Mean:", cross_val_score(clf, X_train, y_train, cv=5).mean())
        print("Cross Validation Std:", cross_val_score(clf, X_train, y_train, cv=5).std())

print('Classification Report: \n')
print(classification_report(y_test,y_pred))



Q_Kernel_default




Print predicted data coming from X_test as new input data
[1 1 0 1 0 1 1 1 0]


Print real values

16       1
17038    0
17052    0
17040    0
17       1
17046    0
9        1
17049    0
21       1
Name: Target, dtype: int64


Accuracy: 0.3333333333333333
Precision: 0.3333333333333333
Recall: 0.3333333333333333
f1 Score: 0.3333333333333333
Cross Validation Mean: 0.5142857142857142
Cross Validation Std: 0.06998542122237651


Q_Kernel_8




Print predicted data coming from X_test as new input data
[1 1 1 0 1 1 0 0 0]


Print real values

16       1
17038    0
17052    0
17040    0
17       1
17046    0
9        1
17049    0
21       1
Name: Target, dtype: int64


Accuracy: 0.4444444444444444
Precision: 0.4444444444444444
Recall: 0.4444444444444444
f1 Score: 0.4444444444444444
Cross Validation Mean: 0.7142857142857142
Cross Validation Std: 0.18070158058105024


Q_Kernel_9




Print predicted data coming from X_test as new input data
[0 1 1 0 0 0 0 0 1]


Print real 

## q_kernel_zz 

Another coding example with q_kernel_zz is described below. We will use the ZZFeatureMap with linear entanglement, we will repeat the data encoding step two times, and we will use feature selection (embedded decision tree) to select five features (on a very small dataset of 260 neurons). We will use five qubits on the StatevectorSimulator from the IBM Quantum framework (https://qiskit.org/). The simulator models the noiseless execution of quantum computer hardware, representing the ideal. It evaluates the resulting quantum state vector. For each experiment, to assess whether patterns are identifiable, we will train the supervised classification algorithms using 80% of each sample, randomly chosen, and we will assess the accuracy in predicting the remaining 20%. The accuracy of the classification will be assessed by performing cross-validation on the training dataset. We will average the 5-fold cross-validation scores resulting in a mean +/- standard deviation score. For data rescaling, we will use QuantileTransformer. We also will transform the dataset using a Mahalanobis transformation with suppression of a neuron when the surface of the soma equals 0. The Mahalanobis distance is a multivariate metric measuring the distance between a point and a distribution. Applying the Mahalonobis distance allows reduction of the standard deviation for each feature by deleting neurons from the dataset. The datasets will be preprocessed to address missing values. If a value within the features is missing, the neuron will be deleted from the dataset. Categorical features such as morphology types will be encoded, transforming each categorical feature with m possible values.

In [21]:
# Loading your IBM Quantum account(s)
#from qiskit import IBMQ
#from qiskit.providers.ibmq import least_busy

# Import utilities
import numpy as np
import pandas as pd
    
#from sklearn import preprocessing
#from sklearn.svm import SVC
#from sklearn import metrics
#from sklearn.model_selection import cross_val_score
#from sklearn.metrics import classification_report

# Importing standard Qiskit libraries and Qiskit Machine Learning imports
#from qiskit import Aer, QuantumCircuit, BasicAer
#from qiskit.circuit.library import ZZFeatureMap
#from qiskit_machine_learning.algorithms import QSVC
#from qiskit import QuantumCircuit
#from qiskit.circuit import ParameterVector
#from qiskit.visualization import circuit_drawer

#from typing import Union
#from qiskit_machine_learning.exceptions import QiskitMachineLearningError

# seed for randomization, to keep outputs consistent
from qiskit.utils import algorithm_globals
seed = 123456
algorithm_globals.random_seed = seed

# Define parameters
cv = 5 # 5-fold cross-validation 
feature_dimension = 5 # Features dimension
k_features = 5 # Feature selection
reps = 2 # Repetition
ibm_account = 'YOUR API'
quantum_backend = 'simulator_statevector'

# Import dataset
data = '../data/datasets/neurons_maha_soma.csv'
neuron = pd.read_csv(data, delimiter=',')

print(neuron)

df = neuron.head(22).copy()                    # Ganglion
df = pd.concat([df, neuron.iloc[320:340]])     # Granule
df = pd.concat([df, neuron.iloc[1493:1513]])   # Medium Spiny
df = pd.concat([df, neuron.iloc[1171:1191]])   # Parachromaffin
df = pd.concat([df, neuron.iloc[10031:10051]])   # Pyramidal

df = pd.concat([df, neuron.iloc[2705:2725]]) # Basket
df = pd.concat([df, neuron.iloc[22589:22609]]) # Bitufted
df = pd.concat([df, neuron.iloc[3175:3195]]) # Chandelier
df = pd.concat([df, neuron.iloc[22644:22664]]) # Double bouquet
df = pd.concat([df, neuron.iloc[3199:3219]]) # Martinotti
df = pd.concat([df, neuron.iloc[8260:8280]]) # Nitrergic

df = pd.concat([df, neuron.iloc[2255:2275]]) # Astrocytes
df = pd.concat([df, neuron.iloc[3306:3326]]) # Microglia


# Drop row having at least 1 missing value
df = df.dropna()

# Creating an instance of Labelencoder
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
# Assigning numerical value and storing it
df[["Target"]] = df[["Target"]].apply(enc.fit_transform)

from sklearn.model_selection import train_test_split
# We split our data to y (Target) and X (features)
y = df.loc[:, df.columns == 'Target']
# Features variables
X = df.loc[:, df.columns != ('Target')]
# Split data into train and test
# Option test_size = 0.2 means that we take 20% of the data for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling the data
from sklearn.preprocessing import QuantileTransformer
Normalize = QuantileTransformer(n_quantiles=1000, output_distribution="uniform")
# Transform data
X_train = Normalize.fit_transform(X_train)
X_train = pd.DataFrame(X_train, columns = X.columns)
X_test = Normalize.fit_transform(X_test)
X_test = pd.DataFrame(X_test, columns = X.columns)

from sklearn.tree import DecisionTreeClassifier

def embedded_decision_tree_classifier(X, y, k_features, output_folder=None):
    '''
    Here we use decision tree classifier to select features. We select the k best features (k_features)
    
    Inputs:
        - X (features) DataFrame
        - y (target) DataFrame
    
    '''
    
    print("\n")
    print("Decision Tree Regressor Features Importance: started")
    print("\n")

    # define the model
    model = DecisionTreeClassifier()
    # fit the model
    model.fit(X, y)
    # get importance
    importance = model.feature_importances_
    # Get features name
    feature_names = [f"{i}" for i in X.columns]

    # create a data frame to visualize features importance
    features_importance = pd.DataFrame({"Features": feature_names, "Importances":importance})
    features_importance.set_index('Importances')

    # Print features importance
    print("\n")
    print("Features Importances:")
    print("\n")
    print(features_importance)
    if output_folder is not None:
        features_importance.to_csv(output_folder+'Decision_Tree_Classifier_Features_Importance.csv', index=False)

    if output_folder is not None:
        # plot feature importance
        features_importance.plot(kind='bar',x='Features',y='Importances')
        pyplot.title('Decision Tree Classifier Features Importance')
        pyplot.tight_layout()
        pyplot.savefig(output_folder+'Decision_Tree_Classfier_Features_Importance.png')
    
    # Select the k most important features
    features_columns = []
    # Order the features importance dataframe
    df = pd.DataFrame(data = features_importance.sort_values(by='Importances', key=abs,ascending=False))
    # Put the k most important features in features_columns
    for x in range(k_features):
        features_columns = features_columns + [df.iloc[x][0]]

    # Create a new DataFrame with selected features
    df_data = pd.DataFrame(data = X, columns = features_columns)
    
    print("\n")
    print("Decision Tree Classifier Features Importance: DataFrame")
    print("\n")
    print(df_data)
    
    return df_data

X_train = embedded_decision_tree_classifier(X_train, y_train, k_features)
X_test = pd.DataFrame(data = X_test, columns = X_train.columns)

feature_dimension = X_train.shape[1] # Number of features
multiclass = None
output_folder = None
   
# We convert pandas DataFrame into numpy array
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# seed for randomization, to keep outputs consistent
seed = 123456
algorithm_globals.random_seed = seed

# Quantum Feature Mapping with feature_dimension = 5 and reps = 2
from qiskit.circuit.library import ZZFeatureMap
qfm_zz = ZZFeatureMap(feature_dimension=feature_dimension, reps=reps, entanglement="linear")
            
print(qfm_zz)

if quantum_backend is not None:
    # Compute code with online quantum simulators or quantum hardware from the cloud
    # Import QiskitRuntimeService and Sampler
    from qiskit_ibm_runtime import QiskitRuntimeService, Sampler
    # Define service
    service = QiskitRuntimeService(channel = 'ibm_quantum', token = ibm_account, instance = 'ibm-q/open/main')
    # Get backend
    backend = service.backend(quantum_backend) # Use a simulator or hardware from the cloud
    # Define Sampler with diferent options
    # resilience_level=1 adds readout error mitigation
    # execution.shots is the number of shots
    # optimization_level=3 adds dynamical decoupling
    from qiskit_ibm_runtime import Options
    options = Options()
    options.resilience_level = 1
    options.execution.shots = 1024
    options.optimization_level = 3
    sampler = Sampler(session=backend, options = options)
else:
    # Compute code with local simulator (Aer simulators)
    from qiskit.primitives import Sampler
    sampler = Sampler()
    
# After preparing our training and testing datasets, we configure the FidelityQuantumKernel class to compute a kernel matrix using the ZZFeatureMap.
# We utilize the default implementation of the Sampler primitive and the ComputeUncompute fidelity, which calculates the overlaps between states.
# If you do not provide specific instances of Sampler or Fidelity, the code will automatically create these objects with the default values.
from qiskit.algorithms.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel
fidelity = ComputeUncompute(sampler=sampler)
Q_Kernel_zz = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_zz)


# QSVC model
model = QSVC(quantum_kernel=Q_Kernel_zz)
model.fit(X_train,y_train)
score = model.score(X_test, y_test)
print(f'Callable kernel classification test score for q_kernel_zz: {score}')
          
y_pred = model.predict(X_test)
        
print("\n")
print("Print predicted data coming from X_test as new input data")
print(y_pred)
print("\n")
print("Print real values\n")
print(y_test)
print("\n")
    
# K-Fold Cross Validation
from sklearn.model_selection import KFold
k_fold = KFold(n_splits=cv)
score = np.zeros(cv)
i = 0
print(score)
for indices_train, indices_test in k_fold.split(X_train):
    #print(indices_train, indices_test)
    X_train_ = X_train[indices_train]
    X_test_ = X_train[indices_test]
    y_train_ = y_train[indices_train]
    y_test_ = y_train[indices_test]
 
    # fit classifier to data
    model.fit(X_train_, y_train_)

    # score classifier
    score[i] = model.score(X_test_, y_test_)
    i = i + 1

import math
print("cross validation scores: ", score)
cross_mean = sum(score) / len(score)
cross_var = sum(pow(x - cross_mean,2) for x in score) / len(score)  # variance
cross_std  = math.sqrt(cross_var)  # standard deviation
print("cross validation mean: ", cross_mean)
    
results = [metrics.accuracy_score(y_test, y_pred),metrics.precision_score(y_test, y_pred, average='micro'),metrics.recall_score(y_test, y_pred, average='micro'),metrics.f1_score(y_test, y_pred, average='micro'), cross_mean, cross_std]
    
metrics_dataframe = pd.DataFrame(results, index=["Accuracy", "Precision", "Recall", "F1 Score", "Cross-validation mean", "Cross-validation std"], columns=['q_kernel_zz'])

print('Classification Report: \n')
print(classification_report(y_test,y_pred))
        
print(metrics_dataframe)

               Target  Soma_Surface  N_stems  N_bifs  N_branch  N_tips  \
0            ganglion      1149.320      4.0   101.0     206.0   106.0   
1            ganglion      1511.830      3.0    70.0     143.0    74.0   
2            ganglion      1831.530      3.0    13.0      29.0    17.0   
3            ganglion      1291.270      6.0   109.0     224.0   116.0   
4            ganglion      3064.340      4.0    60.0     124.0    65.0   
...               ...           ...      ...     ...       ...     ...   
22686  double_bouquet       605.067      5.0   132.0     269.0   138.0   
22687  double_bouquet       920.949      6.0   121.0     248.0   128.0   
22688  double_bouquet       770.529      3.0   104.0     211.0   108.0   
22689  double_bouquet       478.078      4.0   158.0     320.0   163.0   
22690  double_bouquet       629.470      4.0    65.0     134.0    70.0   

        Width   Height   Depth     Type  ...  Bif_ampl_remote  Bif_tilt_local  \
0      249.09   493.80   33.63



Callable kernel classification test score for q_kernel_zz: 0.5471698113207547


KeyboardInterrupt: 

# Pegasos Quantum Support Vector Classifier: Binary Classification

There is also an alternative method to QSVC (which use the dual optimization from scikit-learn) using the Pegasos algorithm from Shalev-Shwartz where another SVM based algorithm benefits from the quantum kernel method. PegasosQSVC yields a training complexity that is independent of the size of the training set. This means that it could train faster than QSVC with large training sets. 

In [None]:
# Loading your IBM Quantum account(s)
#from qiskit import IBMQ
#IBMQ.save_account('YOUR API')

# Import utilities
#import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#from functools import reduce

# sklearn imports
#from sklearn import preprocessing
#from sklearn.preprocessing import LabelEncoder
#from sklearn.model_selection import train_test_split
#from sklearn.svm import SVC

# Importing standard Qiskit libraries and Qiskit Machine Learning imports
#from qiskit import BasicAer
#from qiskit.circuit.library import ZZFeatureMap, PauliFeatureMap
#from qiskit.utils import QuantumInstance, algorithm_globals
#from qiskit_machine_learning.algorithms import QSVC
#from qiskit_machine_learning.algorithms import PegasosQSVC
#from qiskit_machine_learning.kernels import QuantumKernel
#from qiskit_machine_learning.datasets import ad_hoc_data
#from qiskit.algorithms.optimizers import COBYLA

# Backend objects can also be set up using the IBMQ package.
# The use of these requires us to sign with an IBMQ account.
# Assuming the credentials are already loaded onto your computer, you sign in with
#IBMQ.load_account()
#provider = IBMQ.get_provider(hub='ibm-q')
#backend = provider.get_backend('ibmq_lima')

# What additional backends we have available.
#for backend in provider.backends():
#    print(backend)
    
# seed for randomization, to keep outputs consistent
from qiskit.utils import algorithm_globals
seed = 123456
algorithm_globals.random_seed = seed

# number of qubits is equal to the number of features
#num_qubits = 2

# number of steps performed during the training procedure
tau = 100

# regularization parameter
C = 1000

# Encoding Functions
from functools import reduce

def data_map_8(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: np.pi*(m * n), x)
    return coeff

def data_map_9(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: (np.pi/2)*(m * n), 1 - x)
    return coeff

def data_map_10(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: np.pi*np.exp(((n - m)*(n - m))/8), x)
    return coeff

def data_map_11(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: (np.pi/3)*(m * n), 1/(np.cos(x)))
    return coeff

def data_map_12(x: np.ndarray) -> float:
    coeff = x[0] if len(x) == 1 else reduce(lambda m, n: np.pi*(m * n), np.cos(x))
    return coeff
    
# Quantum Feature Mapping with feature_dimension = 2 and reps = 2
from qiskit.circuit.library import PauliFeatureMap

qfm_default = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full')
print(qfm_default)
qfm_8 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_8)
print(qfm_8)
qfm_9 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_9)
print(qfm_9)
qfm_10 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_10)
print(qfm_10)
qfm_11 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_11)
print(qfm_11)
qfm_12 = PauliFeatureMap(feature_dimension=2,
                                    paulis = ['ZI','IZ','ZZ'],
                                 reps=2, entanglement='full', data_map_func=data_map_12)
print(qfm_12)
                                 
print(qfm_8.draw())

# For quantum access, the following lines must be adapted
if quantum_backend is not None:
    # Compute code with online quantum simulators or quantum hardware from the cloud
    # Import QiskitRuntimeService and Sampler
    from qiskit_ibm_runtime import QiskitRuntimeService, Sampler
    # Define service
    service = QiskitRuntimeService(channel = 'ibm_quantum', token = ibm_account, instance = 'ibm-q/open/main')
    # Get backend
    backend = service.backend(quantum_backend) # Use a simulator or hardware from the cloud
    # Define Sampler: We use the reference implementation of the Sampler primitive and the ComputeUncompute fidelity that computes overlaps between states. These are the default values and if you don't pass a Sampler or Fidelity instance, the same objects will be created automatically for you.
    # Run Quasi-Probability calculation
    # optimization_level=3 adds dynamical decoupling
    # resilience_level=1 adds readout error mitigation
    from qiskit_ibm_runtime import Options
    options = Options()
    options.resilience_level = 1
    options.execution.shots = 1024
    options.optimization_level = 3
    sampler = Sampler(session=backend, options = options)
else:
    # Compute code with local simulator (Aer simulators)
    from qiskit.primitives import Sampler
    sampler = Sampler()
    
from qiskit.algorithms.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel
fidelity = ComputeUncompute(sampler=sampler)
Q_Kernel_default = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_default)
Q_Kernel_8 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_8)
Q_Kernel_9 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_9)
Q_Kernel_10 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_10)
Q_Kernel_11 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_11)
Q_Kernel_12 = FidelityQuantumKernel(fidelity=fidelity, feature_map=qfm_12)

names = ["Q_Kernel_default", "Q_Kernel_8", "Q_Kernel_9",
         "Q_Kernel_10", "Q_Kernel_11", "Q_Kernel_12"]

classifiers = [
    PegasosQSVC(quantum_kernel=Q_Kernel_default, C=C, num_steps=tau),
    PegasosQSVC(quantum_kernel=Q_Kernel_8, C=C, num_steps=tau),
    PegasosQSVC(quantum_kernel=Q_Kernel_9, C=C, num_steps=tau),
    PegasosQSVC(quantum_kernel=Q_Kernel_10, C=C, num_steps=tau),
    PegasosQSVC(quantum_kernel=Q_Kernel_11, C=C, num_steps=tau),
    PegasosQSVC(quantum_kernel=Q_Kernel_12, C=C, num_steps=tau),
              ]

# Import dataset
data = '../data/datasets/neurons_binary.csv'
neuron = pd.read_csv(data, delimiter=';')

df = neuron.head(22).copy()                        # Principal
df = pd.concat([df, neuron.iloc[17034:17056]])     # Interneuron

# Drop row having at least 1 missing value
df = df.dropna()

# Creating an instance of Labelencoder
enc = LabelEncoder()
# Assigning numerical value and storing it
df[["Target"]] = df[["Target"]].apply(enc.fit_transform)

# Divide the data, y the variable to predict (Target) and X the features
X = df[df.columns[1:]]
y = df['Target']

# Splitting the data : training and test (20%)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Scaling the data
Normalize = preprocessing.StandardScaler()
# Transform data
X_train = Normalize.fit_transform(X_train)
X_train = pd.DataFrame(X_train, columns = X.columns)
X_test = Normalize.fit_transform(X_test)
X_test = pd.DataFrame(X_test, columns = X.columns)

# Dimension Reduction with PCA (with two principal compoenents)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
# transform data
X_train = pca.fit_transform(X_train)
X_test = pca.fit_transform(X_test)
# Define a new DataFrame with two column (the principal components)
component_columns = []
for x in (n+1 for n in range(2)):
    component_columns = component_columns + ['PCA_%i'%x]
X_train = pd.DataFrame(data = X_train, columns = component_columns)
X_test = pd.DataFrame(data = X_test, columns = component_columns)

print(X_train)
print(X_test)

# iterate over classifiers
for name, clf in zip(names, classifiers):
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        print(f'Callable kernel classification test score for {name}: {score}')
        
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report

# Provide metrics over classifiers
for name, clf in zip(names, classifiers):
        print("\n")
        print(name)
        print("\n")
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
    
        print("\n")
        print("Print predicted data coming from X_test as new input data")
        print(y_pred)
        print("\n")
        print("Print real values\n")
        print(y_test)
        print("\n")
    
        print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
        print("Precision:", metrics.precision_score(y_test, y_pred, average='micro'))
        print("Recall:", metrics.recall_score(y_test, y_pred, average='micro'))
        print("f1 Score:", metrics.f1_score(y_test, y_pred, average='micro'))
        print("Cross Validation Mean:", cross_val_score(clf, X_train, y_train, cv=5).mean())
        print("Cross Validation Std:", cross_val_score(clf, X_train, y_train, cv=5).std())
        
        
        results = [metrics.accuracy_score(y_test, y_pred),metrics.precision_score(y_test, y_pred, average='micro'),metrics.recall_score(y_test, y_pred, average='micro'),metrics.f1_score(y_test, y_pred, average='micro'), cross_val_score(clf, X_train, y_train, cv=5).mean(), cross_val_score(clf, X_train, y_train, cv=5).std()]
        metrics_dataframe = pd.DataFrame(results, index=["Accuracy", "Precision", "Recall", "F1 Score", "Cross-validation mean", "Cross-validation std"], columns=[name])

        print('Classification Report: \n')
        print(classification_report(y_test,y_pred))
    
        print('Metrics:')
        print(metrics_dataframe)
    
    


## Quantum Kernel Training

It is also possible to train a quantum kernel with Quantum Kernel Alignment (QKA) that iteratively adapts a parametrized quantum kernel to a dataset and converging to the maximum SVM margin at the same time. To implement it, we prepare the dataset as usual and define the quantum feature map. Then, we will use QuantumKernelTrained.fit method to train the kernel parameters and pass it to a machine learning model.
Source: https://lab.quantum-computing.ibm.com/user/5ae8692a0f0205003930696d/lab/workspaces/auto-s/tree/qiskit-tutorials/qiskit-machine-learning/08_quantum_kernel_trainer.ipynb

In [None]:
# Loading your IBM Quantum account(s)
#from qiskit import IBMQ

# Import utilities
import numpy as np
import pandas as pd

# sklearn imports
#from sklearn import preprocessing
#from sklearn.preprocessing import LabelEncoder
#from sklearn.model_selection import train_test_split
#from sklearn import metrics
#from sklearn.model_selection import cross_val_score
#from sklearn.metrics import classification_report

# Importing standard Qiskit libraries and Qiskit Machine Learning imports
#from qiskit import BasicAer
#from qiskit.utils import QuantumInstance, algorithm_globals
#from qiskit import QuantumCircuit
#from qiskit.circuit import ParameterVector
#from qiskit.providers.aer import AerSimulator
#from qiskit.visualization import circuit_drawer
#from qiskit.algorithms.optimizers import SPSA
#from qiskit.circuit.library import ZZFeatureMap
#from qiskit_machine_learning.kernels import QuantumKernel
#from qiskit_machine_learning.kernels.algorithms import QuantumKernelTrainer
#from qiskit_machine_learning.algorithms import QSVC

# Import dataset
data = '../data/datasets/neurons.csv'
neuron = pd.read_csv(data, delimiter=';')

# Select a subset of the data composed of three classes
df = neuron.head(20).copy()                    # Ganglion
df = pd.concat([df, neuron.iloc[373:393]])     # Granule
df = pd.concat([df, neuron.iloc[17033:17053]]) # Basket

# seed for randomization, to keep outputs consistent
from qiskit.utils import algorithm_globals
seed = 123456
algorithm_globals.random_seed = seed

# Drop row having at least 1 missing value
df = df.dropna()

# Creating an instance of Labelencoder
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
# Assigning numerical value and storing it
df[["Target"]] = df[["Target"]].apply(enc.fit_transform)

# Divide the data, y the variable to predict (Target) and X the features
X = df[df.columns[1:]]
y = df['Target']

# Splitting the data : training and test (20%)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Scaling the data
from sklearn import preprocessing
Normalize = preprocessing.StandardScaler()
# Transform data
X_train = Normalize.fit_transform(X_train)
X_train = pd.DataFrame(X_train, columns = X.columns)
X_test = Normalize.fit_transform(X_test)
X_test = pd.DataFrame(X_test, columns = X.columns)

# Dimension Reduction with PCA (with two principal compoenents)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
# transform data
X_train = pca.fit_transform(X_train)
X_test = pca.fit_transform(X_test)

# Define a new DataFrame with two column (the principal components)
component_columns = []
for x in (n+1 for n in range(2)):
    component_columns = component_columns + ['PCA_%i'%x]
X_train = pd.DataFrame(data = X_train, columns = component_columns)
X_test = pd.DataFrame(data = X_test, columns = component_columns)

# Define some parameters
feature_dimension = 2 # number of features
reps = 2 # number of repetitions
quantum_backend = 'ibmq_qasm_simulator' # quantum backend
cv = 2 # Cross-validation
circuits = 2 # number of circuits

# The use of these requires us to sign with an IBMQ account.
# Assuming the credentials are already loaded onto your computer, you sign in with
#IBMQ.save_account('YOUR API')
#IBMQ.load_account()
#provider = IBMQ.get_provider(hub='ibm-q')

# Define a callback class for our optimizer
class QKTCallback:
    """Callback wrapper class."""

    def __init__(self) -> None:
        self._data = [[] for i in range(5)]

    def callback(self, x0, x1=None, x2=None, x3=None, x4=None):
        """
        Args:
            x0: number of function evaluations
            x1: the parameters
            x2: the function value
            x3: the stepsize
            x4: whether the step was accepted
        """
        self._data[0].append(x0)
        self._data[1].append(x1)
        self._data[2].append(x2)
        self._data[3].append(x3)
        self._data[4].append(x4)

    def get_callback_data(self):
        return self._data

    def clear_callback_data(self):
        self._data = [[] for i in range(5)]
    
# seed for randomization, to keep outputs consistent
#seed = 123456
#algorithm_globals.random_seed = seed

# Qiskit imports
from qiskit import QuantumCircuit
from qiskit.circuit import ParameterVector
from qiskit.visualization import circuit_drawer
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import TrainableFidelityQuantumKernel
from qiskit_machine_learning.kernels.algorithms import QuantumKernelTrainer
        
# Create a rotational layer to train. We will rotate each qubit the same amount.
training_params = ParameterVector("θ", 1)
fm0 = QuantumCircuit(feature_dimension)
for qubit in range(feature_dimension):
    fm0.ry(training_params[0], qubit)
        
#fm0.ry(training_params[0], 0)
#fm0.ry(training_params[0], 1)

# Use ZZFeatureMap to represent input data
fm1 = ZZFeatureMap(feature_dimension=feature_dimension, reps=reps, entanglement='linear')

# Create the feature map, composed of our two circuits
fm = fm0.compose(fm1)

print(circuit_drawer(fm))
print(f"Trainable parameters: {training_params}")
    

# Rotational layer to train. We rotate each qubit the same amount.
#from qiskit.circuit import ParameterVector
#user_params = ParameterVector("θ", 1)
#fm0 = QuantumCircuit(circuits) # Number of circuits
#fm0.ry(user_params[0], 0)
#fm0.ry(user_params[0], 1)

# Use ZZFeatureMap 
#qfm_zz = ZZFeatureMap(feature_dimension)

# Create the feature map
#fm = fm0.compose(qfm_zz)

#print(circuit_drawer(fm))
#print(f"Trainable parameters: {user_params}")

#print(qfm_zz)

if quantum_backend is not None:
    # Compute code with online quantum simulators or quantum hardware from the cloud
    # Import QiskitRuntimeService and Sampler
    from qiskit_ibm_runtime import QiskitRuntimeService, Sampler
    # Define service
    service = QiskitRuntimeService(channel = 'ibm_quantum', token = ibm_account, instance = 'ibm-q-internal/deployed/default')
    # Get backend
    backend = service.backend(quantum_backend) # Use a simulator or hardware from the cloud
    # Define Sampler: With our training and testing datasets ready, we set up the FidelityQuantumKernel class to calculate a kernel matrix using the ZZFeatureMap. We use the reference implementation of the Sampler primitive and the ComputeUncompute fidelity that computes overlaps between states. These are the default values and if you don't pass a Sampler or Fidelity instance, the same objects will be created automatically for you.
    # Run Quasi-Probability calculation
    # optimization_level=3 adds dynamical decoupling
    # resilience_level=1 adds readout error mitigation
    from qiskit_ibm_runtime import Options
    options = Options()
    options.resilience_level = 1
    options.execution.shots = 1
    options.optimization_level = 3
    sampler = Sampler(session=backend, options = options)
else:
    # Compute code with local simulator (Aer simulators)
    from qiskit.primitives import Sampler
    sampler = Sampler()

# We utilize the default implementation of the Sampler primitive and the ComputeUncompute fidelity, which calculates the overlaps between states.
# If you do not provide specific instances of Sampler or Fidelity, the code will automatically create these objects with the default values.
from qiskit.algorithms.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel
fidelity = ComputeUncompute(sampler=sampler)
    
# Instantiate quantum kernel
quant_kernel = TrainableFidelityQuantumKernel(fidelity = fidelity, feature_map=fm, training_parameters=training_params)

# Set up the optimizer
cb_qkt = QKTCallback()
spsa_opt = SPSA(maxiter=10, callback=cb_qkt.callback, learning_rate=0.05, perturbation=0.05)

# Instantiate a quantum kernel trainer
qkt = QuantumKernelTrainer(
    quantum_kernel=quant_kernel, loss="svc_loss", optimizer=spsa_opt, initial_point=[np.pi / 2]
)

# Train the kernel using QKT directly
qka_results = qkt.fit(X_train, y_train)
optimized_kernel = qka_results.quantum_kernel
print(qka_results)

# Use QSVC for classification
qsvc = QSVC(quantum_kernel=optimized_kernel)

# Fit the QSVC
qsvc.fit(X_train, y_train)

# Predict the labels
y_pred = qsvc.predict(X_test)

# Evalaute the test accuracy
accuracy_test = metrics.balanced_accuracy_score(y_true=y_test, y_pred=y_pred)
print(f"accuracy test: {accuracy_test}")

# Print predicted values and real values of the X_test dataset
print("\n")
print("Print predicted data coming from X_test as new input data")
print(y_pred)
print("\n")
print("Print real values\n")
print(y_test)
print("\n")
    
# Print accuracy metrics of the model
results = [metrics.accuracy_score(y_test, y_pred),metrics.precision_score(y_test, y_pred, average='micro'),metrics.recall_score(y_test, y_pred, average='micro'),metrics.f1_score(y_test, y_pred, average='micro'), cross_val_score(qsvc, X_train, y_train, cv=cv).mean(), cross_val_score(qsvc, X_train, y_train, cv=cv).std()]
metrics_dataframe = pd.DataFrame(results, index=["Accuracy", "Precision", "Recall", "F1 Score", "Cross-validation mean", "Cross-validation std"], columns=['q_kernel_training'])
print('Classification Report: \n')
print(classification_report(y_test,y_pred))
        
metrics_dataframe
