# q_kernel_zz

In [2]:
# Loading your IBM Quantum account(s)
from qiskit import IBMQ

# Import utilities
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from functools import reduce

# sklearn imports
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report

# Importing standard Qiskit libraries and Qiskit Machine Learning imports
from qiskit import Aer, QuantumCircuit, BasicAer
from qiskit.circuit.library import ZZFeatureMap, PauliFeatureMap, RealAmplitudes
from qiskit.utils import QuantumInstance, algorithm_globals
from qiskit_machine_learning.algorithms import QSVC, PegasosQSVC
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.kernels.algorithms import QuantumKernelTrainer
from qiskit_machine_learning.datasets import ad_hoc_data
from qiskit.algorithms.optimizers import COBYLA, L_BFGS_B
from qiskit import QuantumCircuit
from qiskit.circuit import ParameterVector
from qiskit.providers.aer import AerSimulator
from qiskit.visualization import circuit_drawer
from qiskit.algorithms.optimizers import SPSA
from qiskit.circuit import Parameter
from typing import Union
from qiskit_machine_learning.exceptions import QiskitMachineLearningError

# seed for randomization, to keep outputs consistent
seed = 123456
algorithm_globals.random_seed = seed

# Define parameters
cv = 5 # Cross-validation 
feature_dimension = 5 # Features dimension
k_features = 5 # Feature selection
reps = 2 # Repetition
ibm_account = 'f788498a9bb1808e0d9c491721fa5ce8cdf66d26c3bb39ae71500ecc1a17cb0804c14e0d6d1c003fc50418cda3b7a11db31381bb75528bf27076a7cb17cf3a13'
quantum_backend = 'ibmq_kolkata'
multiclass = None
output_folder = None

# Import dataset
data = 'neurons.csv'
neuron = pd.read_csv(data, delimiter=';')

df = neuron.head(371).copy()                    # Ganglion
df = pd.concat([df, neuron.iloc[373:1410]])     # Granule
df = pd.concat([df, neuron.iloc[1411:2272]])   # Medium Spiny
df = pd.concat([df, neuron.iloc[2273:2797]])   # Parachromaffin
df = pd.concat([df, neuron.iloc[2840:3294]])   # Purkinje
df = pd.concat([df, neuron.iloc[3295:17032]])   # Pyramidal

df = pd.concat([df, neuron.iloc[17033:17505]]) # Basket
df = pd.concat([df, neuron.iloc[17506:17572]]) # Bitufted
df = pd.concat([df, neuron.iloc[17573:17598]]) # Chandelier
df = pd.concat([df, neuron.iloc[17599:17648]]) # Double bouquet
df = pd.concat([df, neuron.iloc[17649:17785]]) # Martinotti
df = pd.concat([df, neuron.iloc[17786:19829]]) # Nitrergic

df = pd.concat([df, neuron.iloc[19830:21436]]) # Astrocytes
df = pd.concat([df, neuron.iloc[21437:27882]]) # Microglia

# Drop row having at least 1 missing value
df = df.dropna()

# Creating an instance of Labelencoder
enc = LabelEncoder()
# Assigning numerical value and storing it
df[["Target"]] = df[["Target"]].apply(enc.fit_transform)

# Divide the data, y the variable to predict (Target) and X the features
X = df[df.columns[1:]]
y = df['Target']

# Splitting the data : training and test (20%)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Scaling the data
from sklearn.preprocessing import QuantileTransformer
Normalize = QuantileTransformer(n_quantiles=1000, output_distribution="uniform")
# Transform data
X_train = Normalize.fit_transform(X_train)
X_train = pd.DataFrame(X_train, columns = X.columns)
X_test = Normalize.fit_transform(X_test)
X_test = pd.DataFrame(X_test, columns = X.columns)

print(X_train)

       Soma_Surface   N_stems    N_bifs  N_branch    N_tips     Width  \
0          0.838617  0.597598  0.720220  0.718218  0.715215  0.606779   
1          0.579659  0.597598  0.536036  0.537037  0.537037  0.495207   
2          0.595703  0.456456  0.629129  0.619620  0.609109  0.904932   
3          0.388786  0.456456  0.049049  0.078078  0.100100  0.031450   
4          0.900423  0.597598  0.865866  0.865866  0.867367  0.951721   
...             ...       ...       ...       ...       ...       ...   
22251      0.731001  0.894394  0.424424  0.461962  0.496997  0.582912   
22252      0.000000  0.000000  0.250751  0.216216  0.176176  0.377306   
22253      0.356055  0.894394  0.375876  0.415916  0.452953  0.144144   
22254      0.226288  0.158158  0.981730  0.981315  0.980781  0.270687   
22255      0.548778  0.597598  0.424424  0.427427  0.431431  0.910963   

         Height     Depth      Type  Diameter  ...  Bif_ampl_remote  \
0      0.908276  0.898557  0.901879  0.900733  ...  

In [3]:
from sklearn.ensemble import GradientBoostingClassifier

def embedded_xgboost_classification(X, y, k_features, output_folder = None):
    '''
    Here we use XGboost classifier to select features. We select the k best features (k_features)
    
    Inputs:
        - X (features) DataFrame
        - y (target) DataFrame
    
    '''
    
    print("\n")
    print("XGBoost Classification Features Importance: started")
    print("\n")

    # define the model
    model = GradientBoostingClassifier()
    # fit the model
    model.fit(X, y)
    
    # get importance
    importance = model.feature_importances_
    
    # Get features name
    feature_names = [f"{i}" for i in X.columns]

    # create a data frame to visualize features importance
    features_importance = pd.DataFrame({"Features": feature_names, "Importances":importance})
    features_importance.set_index('Importances')

    # Print features importance
    print("\n")
    print("Features Importances:")
    print("\n")
    print(features_importance)
    if output_folder is not None:
        features_importance.to_csv(output_folder+'XGBoost_Classification_Features_Importance.csv', index=False)

    if output_folder is not None:
        # plot feature importance
        features_importance.plot(kind='bar',x='Features',y='Importances')
        pyplot.title('XGBoost Classification Features Importance')
        pyplot.tight_layout()
        pyplot.savefig(output_folder+'XGBoost_Classification_Features_Importance.png')
    
    # Select the k most important features
    features_columns = []
    # Order the features importance dataframe
    df = pd.DataFrame(data = features_importance.sort_values(by='Importances', key=abs,ascending=False))
    # Put the k most important features in features_columns
    for x in range(k_features):
        features_columns = features_columns + [df.iloc[x][0]]

    # Create a new DataFrame with selected features
    df_data = pd.DataFrame(data = X, columns = features_columns)
    
    print("\n")
    print("XGBoost Classification Features Importance: DataFrame")
    print("\n")
    print(df_data)
    
    return df_data

X_train = embedded_xgboost_classification(X_train, y_train, k_features)
X_test = pd.DataFrame(data = X_test, columns = X_train.columns)

print(X_train)



XGBoost Classification Features Importance: started




Features Importances:


                 Features  Importances
0            Soma_Surface     0.045313
1                 N_stems     0.017606
2                  N_bifs     0.000028
3                N_branch     0.000055
4                  N_tips     0.003328
5                   Width     0.058602
6                  Height     0.357591
7                   Depth     0.085632
8                    Type     0.101204
9                Diameter     0.003870
10           Diameter_pow     0.008358
11                 Length     0.007395
12                Surface     0.005140
13            SectionArea     0.002873
14                 Volume     0.013695
15            EucDistance     0.012022
16           PathDistance     0.004510
17           Branch_Order     0.000721
18        Terminal_degree     0.000768
19        TerminalSegment     0.006249
20                Taper_1     0.008681
21                Taper_2     0.019154
22      Branch_pathle

In [4]:
def q_kernel_zz(X, X_train, X_test, y, y_train, y_test, cv, feature_dimension = None, reps= None, ibm_account = None, quantum_backend = None, multiclass = None, output_folder = None):
   
    # We convert pandas DataFrame into numpy array
    X_train = X_train.to_numpy()
    X_test = X_test.to_numpy()
    
    # seed for randomization, to keep outputs consistent
    seed = 123456
    algorithm_globals.random_seed = seed

    # Quantum Feature Mapping with feature_dimension = 2 and reps = 2
    qfm_zz = ZZFeatureMap(feature_dimension=feature_dimension, reps=reps, entanglement="full")
    #adhoc_feature_map = ZZFeatureMap(feature_dimension=adhoc_dimension, reps=2, entanglement="linear")
            
    print(qfm_zz)
    
    # Use of a real quantum computer
    # The use of these requires us to sign with an IBMQ account.
    # Assuming the credentials are already loaded onto your computer, you sign in with
    IBMQ.save_account(ibm_account, overwrite=True)
    IBMQ.load_account()
    provider = IBMQ.get_provider(hub='ibm-q-internal', group='deployed', project='default')
    # What additional backends we have available.
    for backend in provider.backends():
        print(backend)
                    
    backend = provider.get_backend(quantum_backend)
    #backend.configuration().default_rep_delay == 0.00001  # Equality test on float is bad
    real_qcomp_backend = QuantumInstance(backend, shots=1024)
    Q_Kernel_zz = QuantumKernel(feature_map=qfm_zz, quantum_instance=real_qcomp_backend)
    
    model = QSVC(quantum_kernel=Q_Kernel_zz)
        
    model.fit(X_train,y_train)
    score = model.score(X_test, y_test)
    print(f'Callable kernel classification test score for q_kernel_zz: {score}')
          
    y_pred = model.predict(X_test)
    
    if output_folder is not None:
        if multiclass is None:
            model.save(output_folder+"q_kernel_zz.model")
    
    print("\n")
    print("Print predicted data coming from X_test as new input data")
    print(y_pred)
    print("\n")
    print("Print real values\n")
    print(y_test)
    print("\n")
    
    # K-Fold Cross Validation
    from sklearn.model_selection import KFold
    k_fold = KFold(n_splits=cv)
    score = np.zeros(cv)
    i = 0
    print(score)
    for indices_train, indices_test in k_fold.split(X_train):
        #print(indices_train, indices_test)
        X_train_ = X_train[indices_train]
        X_test_ = X_train[indices_test]
        y_train_ = y_train[indices_train]
        y_test_ = y_train[indices_test]
 
        # fit classifier to data
        model.fit(X_train_, y_train_)

        # score classifier
        score[i] = model.score(X_test_, y_test_)
        i = i + 1

    import math
    print("cross validation scores: ", score)
    cross_mean = sum(score) / len(score)
    cross_var = sum(pow(x - cross_mean,2) for x in score) / len(score)  # variance
    cross_std  = math.sqrt(cross_var)  # standard deviation
    print("cross validation mean: ", cross_mean)
    
    results = [metrics.accuracy_score(y_test, y_pred),metrics.precision_score(y_test, y_pred, average='micro'),metrics.recall_score(y_test, y_pred, average='micro'),metrics.f1_score(y_test, y_pred, average='micro'), cross_mean, cross_std]
    
    metrics_dataframe = pd.DataFrame(results, index=["Accuracy", "Precision", "Recall", "F1 Score", "Cross-validation mean", "Cross-validation std"], columns=['q_kernel_zz'])

    print('Classification Report: \n')
    print(classification_report(y_test,y_pred))
        
    return metrics_dataframe

feature_dimension = X_train.shape[1] # Number of features
multiclass = None
output_folder = None
df_results = q_kernel_zz(X, X_train, X_test, y, y_train, y_test, cv, feature_dimension, reps, ibm_account, quantum_backend, multiclass, output_folder)
print(df_results)

     ┌─────────────────────────────────────────┐
q_0: ┤0                                        ├
     │                                         │
q_1: ┤1                                        ├
     │                                         │
q_2: ┤2 ZZFeatureMap(x[0],x[1],x[2],x[3],x[4]) ├
     │                                         │
q_3: ┤3                                        ├
     │                                         │
q_4: ┤4                                        ├
     └─────────────────────────────────────────┘
ibmq_qasm_simulator
ibmq_montreal
ibmq_toronto
ibmq_kolkata
ibmq_mumbai
ibmq_lima
ibmq_belem
ibmq_quito
ibmq_guadalupe
ibmq_jakarta
ibmq_manila
ibm_hanoi
ibm_lagos
ibm_nairobi
ibm_cairo
ibm_auckland
ibm_perth
ibm_washington
ibm_oslo
ibm_geneva


KeyboardInterrupt: 