In [20]:
import colorlover as cl
import plotly.graph_objs as go
import numpy as np
from sklearn import metrics
import pandas as pd
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import numpy as np


def serve_prediction_plot(
    model, X_train, X_test, y_train, y_test, threshold, prob_list
):
    y_pred = model.predict(X_test)
    incorrect_X = []
    incorrect_Y = []
    incorrect_x_index = []
    for i in range(X_test.shape[0]):
        if y_pred[i] != y_test[i]:
            incorrect_X.append(X_test.iloc[i])
            incorrect_Y.append(y_pred[i])
    incorrect_x = pd.DataFrame(incorrect_X)
    incorrect_x_index = incorrect_x.index
    incorrect_y = np.asarray(incorrect_Y)
    pca = PCA(n_components=200)
    data_pca = pca.fit_transform(X_test)
    tsne = TSNE(n_components=3, verbose=True, perplexity=40.0)
    embedding = tsne.fit_transform(data_pca)
    embedding_df = pd.DataFrame(
        embedding, columns=["x", "y", "z"], index=X_test.index)
    embedding_df['label'] = y_test
    for i in range(len(incorrect_x_index)):
        embedding_df.loc[incorrect_x_index[i], 'label'] = ''.join(
            embedding_df.loc[incorrect_x_index[i], 'label'].split()+'_F'.split())
    embedding_df['pre_pro'] = prob_list

    j = 0
    data = []
    color_scale = ['aggrnyl', 'reds', 'twilight', 'electric']
    groups = embedding_df.groupby("label")
    for idx, val in groups:
        scatter = go.Scatter3d(
            name=str(embedding_df.index),
            x=val["x"],
            y=val["y"],
            z=val["z"],
            text=[idx for _ in range(val["x"].shape[0])],
            hoverinfo='text',
            textposition="middle center",
            showlegend=False,
            mode="markers",
            marker=dict(size=3, symbol="circle",
                        color=prob_list,
                        colorscale=color_scale[j],
                        colorbar=dict(title=str(idx),x=-0.5+j/7)
                       )
        )
        j += 1
        data.append(scatter)
    # Pullover Pullover_F T-Shirt T-Shirt_F
    axes=dict(title="", showgrid=True, zeroline=False, showticklabels=False)
    layout=go.Layout(
        margin=dict(l=0, r=0, b=0, t=0),
        scene=dict(xaxis=axes, yaxis=axes, zaxis=axes),
        plot_bgcolor="#124c52",
        paper_bgcolor="#124c52",
        font={"color": "#a5b1cd"},)

    figure=go.Figure(data=data, layout=layout)
    return figure

def generate_train_data(n_samples):
    fashion_idx = [
        "T-Shirt",
        "Trouser",
        "Pullover",
        "Dress",
        "Coat",
        "Sandal",
        "Shirt",
        "Sneaker",
        "Bag",
        "Ankle boot",
    ]
    idx_dic = {0: 'T-Shirt',
               1: 'Trouser',
               2: 'Pullover',
               3: 'Dress',
               4: 'Coat',
               5: 'Sandal',
               6: 'Shirt',
               7: 'Sneaker',
               8: 'Bag',
               9: 'Ankle boot'}
    df_train = pd.read_csv('fashion_mnist_data/fashion-mnist_train.csv')
    label_list = []
    df_train['label'] = df_train.label.replace(idx_dic)
    label_list = df_train['label']
    df_train.drop('label', 1, inplace=True)
    df_train = df_train.div(255.0)
    df_train.insert(0, 'label', label_list, False)
    df = df_train.sample(n=n_samples, random_state=42)
    df1 = df.loc[df['label'] != 'Ankle boot', :]
    df2 = df1.loc[df['label'] != 'Trouser', :]
    df3 = df2.loc[df['label'] != 'Bag', :]
    df4 = df3.loc[df['label'] != 'Coat', :]
    df5 = df4.loc[df['label'] != 'Sandal', :]
    df6 = df5.loc[df['label'] != 'Shirt', :]
    df7 = df6.loc[df['label'] != 'Sneaker', :]
    df8 = df7.loc[df['label'] != 'Dress', :]
    x_train = df8.loc[:, df8.columns != 'label']
    y_train= df8.label.values
    return x_train,y_train

def generate_test_data(n_samples):
    fashion_idx = [
        "T-Shirt",
        "Trouser",
        "Pullover",
        "Dress",
        "Coat",
        "Sandal",
        "Shirt",
        "Sneaker",
        "Bag",
        "Ankle boot",
    ]
    idx_dic = {0: 'T-Shirt',
               1: 'Trouser',
               2: 'Pullover',
               3: 'Dress',
               4: 'Coat',
               5: 'Sandal',
               6: 'Shirt',
               7: 'Sneaker',
               8: 'Bag',
               9: 'Ankle boot'}
    df_test=pd.read_csv('fashion_mnist_data/fashion-mnist_test.csv')
    label_list=[]
    df_test['label']=df_test.label.replace(idx_dic)
    label_list=df_test['label']
    df_test.drop('label', 1, inplace=True)
    df_test=df_test.div(255.0)
    df_test.insert(0, 'label',label_list, False)
    df_t = df_test.sample(n=int(n_samples/4), random_state=42)
    df1_t = df_t.loc[df_t['label'] != 'Ankle boot', :]
    df2_t = df1_t.loc[df_t['label'] != 'Trouser', :]
    df3_t = df2_t.loc[df_t['label'] != 'Bag', :]
    df4_t = df3_t.loc[df_t['label'] != 'Coat', :]
    df5_t = df4_t.loc[df_t['label'] != 'Sandal', :]
    df6_t = df5_t.loc[df_t['label'] != 'Shirt', :]
    df7_t = df6_t.loc[df_t['label'] != 'Sneaker', :]
    df8_t = df7_t.loc[df_t['label'] != 'Dress', :]
    x_test = df8_t.loc[:, df8_t.columns != 'label']
    y_test= df8_t.label.values
    return x_test,y_test

#decision_function the distance of the samples X to the seperating hyperplane
#predict_proba the probablicty of the sample to be predicted as one class
X_train, y_train = generate_train_data(n_samples=30000)
print(type(X_train))
X_test, y_test = generate_test_data(n_samples=30000)
clf = SVC(C=10, kernel='linear',  gamma=0.5,probability=True)
clf.fit(X_train, y_train)
dec = clf.decision_function(X_train)
print(dec)
print(len(dec))
prob=clf.predict_proba(X_test)
print(prob)
print(len(prob))

prob_list=list(prob)
print(len(prob_list))
for i in range(len(prob_list)):
    prob_list[i]=max(prob_list[i][0],prob_list[i][1])
print(prob_list)

prediction_figure = serve_prediction_plot(
    model=clf,
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    threshold=0.5,
    prob_list=prob_list
)
prediction_figure.show()