In [8]:
# -*- encoding: utf-8 -*-
import os

import numpy as np
import pandas as pd
from ACME.ACME import ACME
from sklearn.metrics import f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures, StandardScaler

from prml.linear import VariationalLogisticRegression
from utils import load_lt_data


def create_images_file(_path: str) -> str:
    if not os.path.exists(_path):
        os.makedirs(_path)
    return _path


def create_toy_data(is_breast: bool = False,
                    is_heart: bool = False,
                    is_bone: bool = False,
                    is_kaggle_heart: bool = False,
                    _path: str = None):
    scaler = StandardScaler()
    feature = PolynomialFeatures(degree=1, include_bias=True)

    if is_breast:
        image_path = create_images_file("./images/breast_data")
        LT = load_lt_data(_all=True, path="./breast_data/fix_breast_cancer.xlsx")
        feature_names = LT.feature_names
    elif is_heart:
        image_path = create_images_file("./images/spect_data")
        LT = load_lt_data(_all=True, path="./spectf_data/over_resample.xlsx")
        feature_names = LT.feature_names
    elif is_bone:
        image_path = create_images_file("./images/bone_marrow_transplant_data")
        LT = load_lt_data(_all=True, path="./bone_marrow_transplant_data/fix_bone_data.xlsx")
        feature_names = LT.feature_names
    elif is_kaggle_heart:
        image_path = create_images_file("./images/heart_disease_data")
        LT = load_lt_data(_all=True, path="./heart_disease_data/over_resample.xlsx")
        feature_names = LT.feature_names
    else:
        image_path = create_images_file("./images/LT")
        LT = load_lt_data(_all=True, path='./data/over_resample_all_fields_scaler.xlsx')
        feature_names = LT.feature_names
        scaler = MinMaxScaler()

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(LT.data, LT.target, test_size=.3)

    Xtrain = scaler.fit_transform(Xtrain)
    Xtest = scaler.transform(Xtest)

    feature_names.insert(0, "Bias term")
    Xtrain = feature.fit_transform(Xtrain)
    Xtest = feature.transform(Xtest)
    
    train = np.hstack((Ytrain.reshape(-1, 1), Xtrain))
    test = np.hstack((Ytest.reshape(-1, 1), Xtest))

    dataset = np.vstack((train, test))

    return image_path, Xtrain, Xtest, Ytrain, Ytest, dataset, feature_names


flag = True

while flag:
    image_path, Xtrain, Xtest, Ytrain, Ytest, dataset, feature_names = create_toy_data(is_breast=True)

    vlr = VariationalLogisticRegression()
    vlr.fit(Xtrain, Ytrain, feature_names)

    y_pred = vlr.predict(Xtest)
    _score = vlr.score(Xtest, Ytest)
    _f1_macro = f1_score(Ytest, y_pred, average='macro')
    _recall_score = recall_score(Ytest, y_pred, average='macro')
    _precision_score = precision_score(Ytest, y_pred, average='macro')

    if _score >= 0.92 and _f1_macro >= 0.92 and _recall_score >= 0.92 and _precision_score >= 0.92:
        print(_score, _f1_macro, _recall_score, _precision_score, "\n")

        acme_vlr = ACME(vlr, target="Class", features=feature_names, task="class")

        dataset = pd.DataFrame(dataset, columns=["Class"] + feature_names)
        acme_vlr = acme_vlr.explain(dataset, robust=True, label_class=1)
        summary_plot_1 = acme_vlr.summary_plot()
        summary_plot_1.show()
        summary_plot_1.write_image(file='./image_acme/breast_label_1.eps', format='eps')
        acme_vlr = acme_vlr.explain(dataset, robust=True, label_class=0)
        summary_plot_2 = acme_vlr.summary_plot()
        summary_plot_2.show()
        summary_plot_2.write_image(file='./image_acme/breast_label_0.eps', format='eps')
        bar_plot = acme_vlr.bar_plot()
        bar_plot.show()
        bar_plot.write_image(file='./image_acme/breast_bar.eps', format='eps')
        break


all positive 10 [('Clump_Thickness', 1, '1.2737 ± 0.1864'), ('Bias term', 0, '-1.2612 ± 0.1393'), ('Bare_Nuclei', 6, '1.2205 ± 0.2064'), ('Bland_Chromatin', 7, '0.879 ± 0.2344'), ('Uniformity_of_Cell_Shape', 3, '0.8489 ± 0.3491'), ('Normal_Nucleoli', 8, '0.8273 ± 0.2126'), ('Mitoses', 9, '0.7991 ± 0.1823'), ('Marginal_Adhesion', 4, '0.616 ± 0.2047'), ('Uniformity_of_Cell_Size', 2, '0.1753 ± 0.3509'), ('Single_Epithelial_Cell_Size', 5, '-0.0164 ± 0.2146')]
0.9707317073170731 0.9678884711779449 0.96 0.9779411764705883 

