In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from plotnine import *
import lightgbm as lgbm

In [34]:
from typing import List, Dict, Union, Callable

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_multilabel_classification

In [188]:
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, multilabel_confusion_matrix

In [3]:
from libs.evaluation import Metrics

In [4]:
mlabel = make_multilabel_classification(n_samples = 10000,n_features = 20,n_classes = 3,allow_unlabeled=True)

In [105]:
mlabel[0]

array([[4., 4., 5., ..., 2., 2., 6.],
       [4., 4., 5., ..., 4., 1., 0.],
       [4., 2., 3., ..., 6., 1., 0.],
       ...,
       [4., 2., 8., ..., 5., 6., 2.],
       [4., 4., 1., ..., 2., 3., 1.],
       [4., 2., 7., ..., 3., 1., 3.]])

In [5]:
X = pd.DataFrame(mlabel[0])

In [6]:
Y = pd.DataFrame(mlabel[1])

In [7]:
Y.columns = ['a','b','c']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size = 0.3)

In [9]:
model = lgbm.LGBMClassifier(boosting_type = 'dart', max_depth = 4, learning_rate = 0.01,objective='binary')

In [10]:
labels = list(Y)

In [11]:
models = {}
for label in labels:
    model.fit(X = X_train, y = y_train[label])
    models[f'{label}'] = model

In [12]:
models

{'a': LGBMClassifier(boosting_type='dart', learning_rate=0.01, max_depth=4,
                objective='binary'),
 'b': LGBMClassifier(boosting_type='dart', learning_rate=0.01, max_depth=4,
                objective='binary'),
 'c': LGBMClassifier(boosting_type='dart', learning_rate=0.01, max_depth=4,
                objective='binary')}

In [19]:
y_hats = pd.DataFrame()
for label in labels:
    model = models[f'{label}']
    y_hats[f'{label}'] = model.predict_proba(X_test)[:,1]

In [24]:
metrics = {}
for label in labels:
    y_test_ = y_test[label]
    metrics[f'{label}'] = Metrics(y_real = y_test_, model_probs = y_hats[label])

In [28]:
for label in labels:
    print(metrics[label].scores)

   accuracy  precision  recall    f1  auc  gini
0      0.59       0.37    0.44  0.41  0.6   0.2
   accuracy  precision  recall    f1   auc  gini
0      0.46       0.87     0.4  0.55  0.58  0.16
   accuracy  precision  recall    f1   auc  gini
0      0.76       0.78    0.65  0.71  0.85   0.7


In [30]:
f1s = []
for label in labels:
    f1s.append(metrics[label].scores['f1'].values)

In [32]:
macro_average_f1 = np.mean(f1s)

In [33]:
macro_average_f1

0.5566666666666666

In [50]:
a = pd.DataFrame(columns = labels)

In [167]:
def averages(y_tests, y_hats, labels:List[str], threshold:float = 0.5):
    
    scores = []
    micro_scores = []
    sample_scores = []
    f1_scores = pd.DataFrame()
    for label in labels:
        y_test = y_tests[label]
        y_hat = y_hats[label]
        y_pred = np.where(y_hat > threshold, 1, 0)
        score = f1_score(y_true = y_test, y_pred = y_pred)
        scores.append(score)
        
        micro_score = f1_score(y_true = y_test, y_pred = y_pred, average = 'macro')
        micro_scores.append(micro_score)
        
#         sample_score = f1_score(y_true = y_test, y_pred = y_pred, average = 'sample')
#         sample_scores.append(sample_score)
#         sample_scores.append('sample')
        
    scores = np.array([scores])
    print(scores)
    micro_scores = np.array([micro_scores])
    print(micro_scores)
#     sample_scores = np.array([sample_scores])
    
    f1_scores = pd.DataFrame(scores)
    print(f1_scores)
    micro_f1_df = pd.DataFrame(micro_scores)
    print(micro_f1_df)
    f1_scores = pd.concat([f1_scores,micro_f1_df], axis = 0)
    
    return f1_scores


In [168]:
f1_scores = averages(y_tests = y_test, y_hats = y_hats, labels = labels)

[[0.4059501  0.54960981 0.70673077]]
[[0.54490559 0.4398132  0.74891333]]
         0        1         2
0  0.40595  0.54961  0.706731
          0         1         2
0  0.544906  0.439813  0.748913


In [169]:
f1_scores

Unnamed: 0,0,1,2
0,0.40595,0.54961,0.706731
0,0.544906,0.439813,0.748913


In [172]:
y_test.values

array([[1, 1, 1],
       [0, 0, 0],
       [0, 1, 1],
       ...,
       [0, 1, 0],
       [1, 1, 1],
       [0, 1, 1]])

In [175]:
y_preds = np.where(y_hats > 0.5, 1, 0)

In [177]:
f1s = f1_score(y_test, y_preds, average = 'samples')



In [178]:
f1s

0.28826666666666667

In [196]:
f1_score_raw = f1_score(y_test, y_preds, average = 'macro')

In [182]:
report = classification_report(y_test, y_preds)



In [184]:
print(report)

              precision    recall  f1-score   support

           0       0.37      0.44      0.41       955
           1       0.87      0.40      0.55      2459
           2       0.78      0.65      0.71      1367

   micro avg       0.68      0.48      0.56      4781
   macro avg       0.68      0.50      0.55      4781
weighted avg       0.75      0.48      0.57      4781
 samples avg       0.25      0.35      0.29      4781



In [193]:
type(report)

str

In [191]:
multilabel_confusion_matrix(y_test,y_preds)

array([[[1339,  706],
        [ 532,  423]],

       [[ 398,  143],
        [1473,  986]],

       [[1386,  247],
        [ 485,  882]]])

In [198]:
f1s = [0.41,0.55,0.71]

In [200]:
macro_f1 = np.mean(f1s)
print(macro_f1)

0.5566666666666666
