In [1]:
import pandas as pd
from dataload import load_diabetes_data

In [2]:
from models.clpl import CLPL
from models.cts_clpl import CTSCLPL
from models.cts_bi_clpl import CTSBICLPL
from models.dy_cts_clpl import DYCTSCLPL
from models.dy_ccts_clpl import DYCCTSCLPL
from models.ccts_clpl import CCTSCLPL
from models.dy_clpl import DYCLPL
from models.npl import confi

In [3]:
from sklearn.metrics import (accuracy_score,
                             precision_score,
                             recall_score,
                             f1_score,
                             roc_auc_score,
                             confusion_matrix)

from sklearn.ensemble import RandomForestClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import VotingClassifier

In [4]:
import warnings
warnings.filterwarnings("ignore")

## Test

In [5]:
def binary_cls(label_rate=0.1, thrate=12, iters=5):

    x_lab, y_lab, x_unlabel, x_test, y_test, x_train , y_train = load_diabetes_data(label_data_rate=label_rate)

    accuracy, precision, recall, f1, auc, conf_mat = [],[],[],[],[],[]

    random_state = 5

    rf = RandomForestClassifier(random_state=random_state)
    xg = XGBClassifier(random_state = random_state)
    lgb = LGBMClassifier(random_state = random_state)
    ee = VotingClassifier(estimators=[('rf',rf),('xg',xg),('lgb',lgb),],voting='soft',weights=[1,1,1])

    scls = []

    scls.append(SVC(random_state=random_state, probability=True))
    scls.append(RandomForestClassifier(random_state=random_state))
    scls.append(KNeighborsClassifier())
    scls.append(LogisticRegression(random_state = random_state))
    scls.append(XGBClassifier(random_state = random_state))
    scls.append(LGBMClassifier(random_state = random_state))
    scls.append(VotingClassifier(estimators=[('rf',rf),('xg',xg),('lgb',lgb),],voting='soft',weights=[1,1,1]))

    npls = []

    npls.append(confi(ee, x_unlabel, sample_rate=0.2, verbose=True))
    npls.append(confi(RandomForestClassifier(random_state=random_state), x_unlabel, sample_rate=0.2, verbose=True))

    sslcls = []
    sslcls.append(CLPL(model = RandomForestClassifier(random_state=random_state),th_rate=thrate, iter=iters))
    sslcls.append(CTSCLPL(model = RandomForestClassifier(random_state=random_state),th_rate=thrate, iter=iters))

    ves = []
    ves.append(CLPL(model = ee,th_rate=thrate, iter=iters))
    ves.append(CTSCLPL(model = ee,th_rate=thrate, iter=iters, random_seed = 4))
    ves.append(CTSBICLPL(model = ee,th_rate=thrate, iter=iters, random_seed = 4,start_th = 95))
    ves.append(DYCTSCLPL(model = ee,th_rate=thrate, iter=iters, random_seed = 4,factor = 2, std_bound=0.12))
    ves.append(DYCCTSCLPL(model = ee,th_rate=thrate, iter=iters, random_seed = 4,factor = 2, std_bound=0.12))
    ves.append(DYCCTSCLPL(model = RandomForestClassifier(random_state=random_state),th_rate=thrate, iter=iters, random_seed = 4,factor = 2, std_bound=0.12))
    ves.append(CCTSCLPL(model = ee,th_rate=thrate, iter=iters, random_seed = 4))
    ves.append(DYCLPL(model = ee,th_rate=thrate, iter=iters, random_seed = 4,factor = 2, std_bound=0.12))

    # supervised
    for clsf in scls:

        clf = clsf
        clf.fit(x_train, y_train)

        y_preds = clf.predict(x_test)
        y_probs = clf.predict_proba(x_test)

        accuracy.append(accuracy_score(y_test,y_preds))
        precision.append(precision_score(y_test,y_preds))
        recall.append((recall_score(y_test,y_preds)))
        f1.append((f1_score(y_test,y_preds)))
        auc.append((roc_auc_score(y_test,y_probs[:,1])))
        conf_mat.append(confusion_matrix(y_test,y_preds))

    # npls
    for clsf in npls:

        clf = clsf
        clf.fit(x_lab, y_lab)

        y_preds = clf.predict(x_test)
        y_probs = clf.predict_proba(x_test)

        accuracy.append(accuracy_score(y_test,y_preds))
        precision.append(precision_score(y_test,y_preds))
        recall.append((recall_score(y_test,y_preds)))
        f1.append((f1_score(y_test,y_preds)))
        auc.append((roc_auc_score(y_test,y_probs[:,1])))
        conf_mat.append(confusion_matrix(y_test,y_preds))

    # cts
    for csslcls in sslcls:

        clf = csslcls
        clf.fit(x_lab, y_lab, x_unlabel)

        y_preds = clf.predict(x_test)
        y_probs = clf.predict_proba(x_test)

        accuracy.append(accuracy_score(y_test,y_preds))
        precision.append(precision_score(y_test,y_preds))
        recall.append((recall_score(y_test,y_preds)))
        f1.append((f1_score(y_test,y_preds)))
        auc.append((roc_auc_score(y_test,y_probs[:,1])))
        conf_mat.append(confusion_matrix(y_test,y_preds))

    # ve-cts
    for ve in ves:

        clf = ve
        clf.fit(x_lab, y_lab, x_unlabel)

        y_preds = clf.predict(x_test)
        y_probs = clf.predict_proba(x_test)

        accuracy.append(accuracy_score(y_test,y_preds))
        precision.append(precision_score(y_test,y_preds))
        recall.append((recall_score(y_test,y_preds)))
        f1.append((f1_score(y_test,y_preds)))
        auc.append((roc_auc_score(y_test,y_probs[:,1])))
        conf_mat.append(confusion_matrix(y_test,y_preds))


    results_df = pd.DataFrame(
                       {"Accuracy Score":accuracy,
                        "Precision Score":precision,
                        "Recall Score":recall,
                        "f1 Score":f1,
                        "AUC Score":auc,
                        "Confusion Matrix":conf_mat,
                        "Algos":["SVC",
                                 "RandomForest",
                                 "KNeighbours",
                                 "LogisticRegression",
                                 "XGBoost",
                                 "LightGBM",
                                 "ve",
                                 "venpl",
                                 "npl",
                                 "clpl",
                                 "ctsclpl",
                                 "veclpl",
                                 "vectsclpl",
                                 "vectsbiclpl",
                                 "dyvectsclpl",
                                 "dyvecctsclpl",
                                 "dycctsclpl",
                                 "vecctsclpl",
                                 "dyveclpl"]})

    results = results_df
    # results = (results_df.sort_values(by = ['AUC Score','f1 Score'], ascending = False).reset_index(drop =  True))

    return scls, sslcls ,results

In [6]:
def ran_train(epoch = 20, lr = 0.1, thr = 12, itr = 5):

    _, _, results_1 = binary_cls(label_rate = lr, thrate= thr, iters=itr)
    total_results = results_1
    for i in range(epoch - 1):
        _, _, results = binary_cls(label_rate = lr)
        total_results.iloc[:,0:6] = total_results.iloc[:,0:6] + results.iloc[:,0:6]

    total_results.iloc[:,0:6] = total_results.iloc[:,0:6] / epoch
    total_results = (total_results.sort_values(by = ['AUC Score','f1 Score','Accuracy Score',], ascending = False)
                     .reset_index(drop =  True))

    return total_results

In [7]:
ran_train(epoch = 50, lr = 0.1, thr = 12, itr = 5)

iter: 0 - threshold: 0.97 - percentile: 88
iter: 1 - threshold: 0.9276 - percentile: 76
iter: 2 - threshold: 0.87 - percentile: 64
iter: 3 - threshold: 0.79 - percentile: 52
iter: 4 - threshold: 0.74 - percentile: 40
iter: 0 - threshold: 0.97 - percentile: 88
iter: 1 - threshold: 0.94 - percentile: 76
iter: 2 - threshold: 0.91 - percentile: 64
iter: 3 - threshold: 0.8428 - percentile: 52
iter: 4 - threshold: 0.79 - percentile: 40
iter: 0 - threshold: 0.983509528684928 - percentile: 88
iter: 1 - threshold: 0.9702372661094293 - percentile: 76
iter: 2 - threshold: 0.9498426964388557 - percentile: 64
iter: 3 - threshold: 0.9196788519543244 - percentile: 52
iter: 4 - threshold: 0.8884667108632933 - percentile: 40
iter: 0 - threshold: 0.9862546069921048 - percentile: 88
iter: 1 - threshold: 0.9756883765569007 - percentile: 76
iter: 2 - threshold: 0.9627305314313208 - percentile: 64
iter: 3 - threshold: 0.945777862928207 - percentile: 52
iter: 4 - threshold: 0.9138404147929041 - percentile: 4

Unnamed: 0,Accuracy Score,Precision Score,Recall Score,f1 Score,AUC Score,Confusion Matrix,Algos
0,0.864583,0.80597,0.80597,0.80597,0.956657,"[[112.0, 13.0], [13.0, 54.0]]",ve
1,0.864583,0.797101,0.820896,0.808824,0.954627,"[[111.0, 14.0], [12.0, 55.0]]",RandomForest
2,0.859375,0.794118,0.80597,0.8,0.954507,"[[111.0, 14.0], [13.0, 54.0]]",LightGBM
3,0.875,0.820896,0.820896,0.820896,0.953194,"[[113.0, 12.0], [12.0, 55.0]]",XGBoost
4,0.833333,0.701149,0.910448,0.792208,0.923701,"[[99.0, 26.0], [6.0, 61.0]]",dyvecctsclpl
5,0.828125,0.697674,0.895522,0.784314,0.921672,"[[99.0, 26.0], [7.0, 60.0]]",vectsbiclpl
6,0.822917,0.689655,0.895522,0.779221,0.917015,"[[98.0, 27.0], [7.0, 60.0]]",ctsclpl
7,0.826875,0.695753,0.895522,0.783093,0.914716,"[[98.76, 26.24], [7.0, 60.0]]",npl
8,0.833333,0.701149,0.910448,0.792208,0.91403,"[[99.0, 26.0], [6.0, 61.0]]",veclpl
9,0.828125,0.697674,0.895522,0.784314,0.913552,"[[99.0, 26.0], [7.0, 60.0]]",dycctsclpl
