In [72]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from qiskit import BasicAer
from qiskit.utils import QuantumInstance, algorithm_globals

import numpy as np

from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from fcmeans import FCM
from quantum_clustering import QKMeans, QFCMeans

seed = 42
algorithm_globals.random_seed = seed
quantum_instance = QuantumInstance(BasicAer.get_backend('qasm_simulator'), 
                                    shots=10240,
                                    optimization_level=1,
                                    seed_simulator=seed,
                                    seed_transpiler=seed)

In [73]:
df = pd.read_csv("leukemia_GSE9476.csv")
df.head()

Unnamed: 0,samples,type,1007_s_at,1053_at,117_at,121_at,1255_g_at,1294_at,1316_at,1320_at,...,AFFX-r2-Hs28SrRNA-5_at,AFFX-r2-Hs28SrRNA-M_at,AFFX-r2-P1-cre-3_at,AFFX-r2-P1-cre-5_at,AFFX-ThrX-3_at,AFFX-ThrX-5_at,AFFX-ThrX-M_at,AFFX-TrpnX-3_at,AFFX-TrpnX-5_at,AFFX-TrpnX-M_at
0,1,Bone_Marrow_CD34,7.745245,7.81121,6.477916,8.841506,4.546941,7.957714,5.344999,4.673364,...,5.058849,6.810004,12.80006,12.718612,5.391512,4.666166,3.974759,3.656693,4.160622,4.139249
1,12,Bone_Marrow_CD34,8.087252,7.240673,8.584648,8.983571,4.548934,8.011652,5.579647,4.828184,...,4.436153,6.751471,12.472706,12.333593,5.379738,4.656786,4.188348,3.792535,4.204414,4.1227
2,13,Bone_Marrow_CD34,7.792056,7.549368,11.053504,8.909703,4.549328,8.237099,5.406489,4.615572,...,4.392061,6.086295,12.637384,12.499038,5.316604,4.600566,3.845561,3.635715,4.174199,4.067152
3,14,Bone_Marrow_CD34,7.767265,7.09446,11.816433,8.994654,4.697018,8.283412,5.582195,4.903684,...,4.633334,6.375991,12.90363,12.871454,5.179951,4.641952,3.991634,3.704587,4.149938,3.91015
4,15,Bone_Marrow_CD34,8.010117,7.405281,6.656049,9.050682,4.514986,8.377046,5.493713,4.860754,...,5.305192,6.700453,12.949352,12.782515,5.341689,4.560315,3.88702,3.629853,4.127513,4.004316


In [74]:
# Split training and test data
y = np.asarray(df.type)
X = np.asarray(df.iloc[:, 2:])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

In [75]:
# fit K-means
kmeans = KMeans(n_clusters=5)
kmeans.fit(X_train)
kmeans_labels = kmeans.labels_

# fit fuzzy C-means
fcmeans = FCM(n_clusters=5)
fcmeans.fit(X_train)
fcmeans_labels = fcmeans.predict(X_train)

# fit quantum K-means
qkmeans = QKMeans(n_clusters=5,
                    quantum_instance=quantum_instance
)
qkmeans.fit(X_train)
qkmeans_labels = qkmeans.labels_

# fit quantum fuzzy C-means
qfcmeans = QFCMeans(n_clusters=5,
                    quantum_instance=quantum_instance
)
qfcmeans.fit(X_train)
qfcm_labels = (qfcmeans.labels_)

In [76]:
# valiDATE
kmeans_predictions = kmeans.predict(X_test)
fcmeans_predictions = fcmeans.predict(X_test)
qkmeans_predictions = qkmeans.predict(X_test)
qfcmeans_predictions = qfcmeans.predict(X_test)

In [77]:
data = {
    "original_labels": y_train, 
    "kmeans_labels": kmeans_labels,
    "fcmeans_labels": fcmeans_labels, 
    "qkmeans_labels": qkmeans_labels,
    "qfcm_labels": qfcm_labels
}
df = pd.DataFrame(data=data)
df.head(20)

Unnamed: 0,original_labels,kmeans_labels,fcmeans_labels,qkmeans_labels,qfcm_labels
0,Bone_Marrow,4,1,3,0
1,AML,1,3,3,4
2,PB,2,2,3,0
3,Bone_Marrow_CD34,3,3,2,1
4,Bone_Marrow,4,1,0,2
5,PB,2,2,1,4
6,Bone_Marrow,4,1,0,2
7,Bone_Marrow_CD34,1,3,0,2
8,AML,1,3,3,0
9,Bone_Marrow_CD34,3,3,4,3


In [45]:
df.original_labels.unique()

array(['Bone_Marrow', 'AML', 'PB', 'Bone_Marrow_CD34', 'PBSC_CD34'],
      dtype=object)

In [102]:
# get labels values
labels = 'qfcm_labels'
pd.DataFrame(df[['original_labels', labels]].groupby(['original_labels', labels])[labels].count())

Unnamed: 0_level_0,Unnamed: 1_level_0,qfcm_labels
original_labels,qfcm_labels,Unnamed: 2_level_1
AML,0,7
AML,1,4
AML,2,4
AML,3,1
AML,4,7
Bone_Marrow,0,4
Bone_Marrow,1,1
Bone_Marrow,2,2
Bone_Marrow_CD34,1,1
Bone_Marrow_CD34,2,2


In [103]:
qfcmeans_label_code_map = {
    0: "PB",
    1: "Bone_Marrow",
    2: "PBSC_CD34",
    3: "Bone_Marrow_CD34",
    4: "AML",
}

qkmeans_label_code_map = {
    0: "PBSC_CD34",
    1: "PB",
    2: "Bone_Marrow",
    3: "AML",
    4: "Bone_Marrow_CD34",
}

fcmeans_label_code_map = {
    0: "Bone_Marrow_CD34",
    1: "Bone_Marrow",
    2: "PB",
    3: "AML",
    4: "PBSC_CD34",
}

kmeans_label_code_map = {
    0: "AML",
    1: "Bone_Marrow_CD34",
    2: "PB",
    3: "PBSC_CD34",
    4: "Bone_Marrow",
}

In [104]:
df_train = df.copy()
df_train['kmeans_labels'] = df_train['kmeans_labels'].map(kmeans_label_code_map)
df_train['fcmeans_labels'] = df_train['fcmeans_labels'].map(fcmeans_label_code_map)
df_train['qkmeans_labels'] = df_train['qkmeans_labels'].map(qkmeans_label_code_map)
df_train['qfcm_labels'] = df_train['qfcm_labels'].map(qfcmeans_label_code_map)

In [106]:
df_train.columns

Index(['original_labels', 'kmeans_labels', 'fcmeans_labels', 'qkmeans_labels',
       'qfcm_labels'],
      dtype='object')

In [111]:
# K-means
print('K-means: ', accuracy_score(df_train.original_labels, df_train.kmeans_labels))

# fuzzy C-means
print('fuzzy C-means: ', accuracy_score(df_train.original_labels, df_train.fcmeans_labels))

# quantum K-means
print('quantum fuzzy C-means: ', accuracy_score(df_train.original_labels, df_train.qkmeans_labels))

# quantum fuzzy C-means
print('quantum fuzzy C-means: ', accuracy_score(df_train.original_labels, df_train.qfcm_labels))

K-means:  0.5686274509803921
fuzzy C-means:  0.5686274509803921
quantum fuzzy C-means:  0.37254901960784315
quantum fuzzy C-means:  0.39215686274509803


In [112]:
df_train.original_labels.unique()

array(['Bone_Marrow', 'AML', 'PB', 'Bone_Marrow_CD34', 'PBSC_CD34'],
      dtype=object)

In [113]:
print('K-means')
print(classification_report(
    df_train.original_labels, 
    df_train.kmeans_labels, 
    labels=['Bone_Marrow', 'AML', 'PB', 'Bone_Marrow_CD34', 'PBSC_CD34'])
)

K-means
                  precision    recall  f1-score   support

     Bone_Marrow       1.00      1.00      1.00         7
             AML       1.00      0.26      0.41        23
              PB       1.00      1.00      1.00         8
Bone_Marrow_CD34       0.08      0.17      0.11         6
       PBSC_CD34       0.41      1.00      0.58         7

        accuracy                           0.57        51
       macro avg       0.70      0.69      0.62        51
    weighted avg       0.81      0.57      0.57        51



In [114]:
print('fuzzy C-means')
print(classification_report(
    df_train.original_labels, 
    df_train.fcmeans_labels, 
    labels=['Bone_Marrow', 'AML', 'PB', 'Bone_Marrow_CD34', 'PBSC_CD34'])
)

fuzzy C-means
                  precision    recall  f1-score   support

     Bone_Marrow       0.50      1.00      0.67         7
             AML       0.52      0.61      0.56        23
              PB       0.80      1.00      0.89         8
Bone_Marrow_CD34       0.00      0.00      0.00         6
       PBSC_CD34       0.00      0.00      0.00         7

        accuracy                           0.57        51
       macro avg       0.36      0.52      0.42        51
    weighted avg       0.43      0.57      0.48        51



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [115]:
print('quantum K-means')
print(classification_report(
    df_train.original_labels, 
    df_train.qkmeans_labels, 
    labels=['Bone_Marrow', 'AML', 'PB', 'Bone_Marrow_CD34', 'PBSC_CD34'])
)

quantum K-means
                  precision    recall  f1-score   support

     Bone_Marrow       0.12      0.14      0.13         7
             AML       0.47      0.35      0.40        23
              PB       0.25      0.25      0.25         8
Bone_Marrow_CD34       0.80      0.67      0.73         6
       PBSC_CD34       0.31      0.57      0.40         7

        accuracy                           0.37        51
       macro avg       0.39      0.40      0.38        51
    weighted avg       0.40      0.37      0.38        51



In [116]:
print('quantum fuzzy C-means')
print(classification_report(
    df_train.original_labels, 
    df_train.qfcm_labels, 
    labels=['Bone_Marrow', 'AML', 'PB', 'Bone_Marrow_CD34', 'PBSC_CD34'])
)

quantum fuzzy C-means
                  precision    recall  f1-score   support

     Bone_Marrow       0.12      0.14      0.13         7
             AML       0.78      0.30      0.44        23
              PB       0.32      0.75      0.44         8
Bone_Marrow_CD34       0.75      0.50      0.60         6
       PBSC_CD34       0.27      0.43      0.33         7

        accuracy                           0.39        51
       macro avg       0.45      0.43      0.39        51
    weighted avg       0.54      0.39      0.40        51



In [118]:
data = {
    "original_labels": y_test, 
    "kmeans_labels": kmeans_predictions,
    "fcmeans_labels": fcmeans_predictions, 
    "qkmeans_labels": qkmeans_predictions,
    "qfcm_labels": qfcmeans_predictions
}
df = pd.DataFrame(data=data)

In [119]:
df_test = df.copy()
df_test['kmeans_labels'] = df_test['kmeans_labels'].map(kmeans_label_code_map)
df_test['fcmeans_labels'] = df_test['fcmeans_labels'].map(fcmeans_label_code_map)
df_test['qkmeans_labels'] = df_test['qkmeans_labels'].map(qkmeans_label_code_map)
df_test['qfcm_labels'] = df_test['qfcm_labels'].map(qfcmeans_label_code_map)

In [122]:
# K-means
print('K-means: ', accuracy_score(df_test.original_labels, df_test.kmeans_labels))

# fuzzy C-means
print('fuzzy C-means: ', accuracy_score(df_test.original_labels, df_test.fcmeans_labels))

# quantum K-means
print('quantum fuzzy C-means: ', accuracy_score(df_test.original_labels, df_test.qkmeans_labels))

# quantum fuzzy C-means
print('quantum fuzzy C-means: ', accuracy_score(df_test.original_labels, df_test.qfcm_labels))

K-means:  0.7692307692307693
fuzzy C-means:  0.46153846153846156
quantum fuzzy C-means:  0.3076923076923077
quantum fuzzy C-means:  0.38461538461538464
