In [1]:
import numpy as np
import matplotlib.pyplot as plt
from utils import load_json

In [2]:
nrmse = {
    'adult': {
        'stat': [1.0017, 0.9972, 1.0002, 1.0125],
        'mf': [0.8358, 0.8561, 0.8745, 0.9071],
        'dsan': [0.7999, 0.8099, 0.8247, 0.8488]
    },
    'bank': {
        'stat': [0.9562, 0.9651, 1.0, 0.9778],
        'mf': [0.7703, 0.8164, 0.8586, 0.8783],
        'dsan': [0.7580, 0.7860, 0.8282, 0.8226]
    },
    'online': {
        'stat': [1.0012, 0.9836, 0.9875, 0.9947],
        'mf': [0.6038, 0.6504, 0.6915, 0.7086],
        'dsan': [0.6311, 0.6413, 0.6619, 0.6738]
    },
    'churn': {
        'stat': [0.9949, 1.0052, 0.9966, 1.0014],
        'mf': [0.9850, 0.9964, 0.9986, 1.0208],
        'dsan': [0.9625, 0.9725, 0.9685, 0.9757]
    }    
}

error = {
    'adult': {
        'stat': [0.4112, 0.4129, 0.4134, 0.4126],
        'mf': [0.2147, 0.2238, 0.2342, 0.2425],
        'dsan': [0.2103, 0.2173, 0.2239, 0.2325]
    },
    'bank': {
        'stat': [0.4063, 0.4088, 0.4093, 0.4074],
        'mf': [0.246, 0.2558, 0.2674, 0.273],
        'dsan': [0.2501, 0.2571, 0.2678, 0.2732]
    },
    'online': {
        'stat': [0.4166, 0.4198, 0.417, 0.4186],
        'mf': [0.3387, 0.3473, 0.3539, 0.3604],
        'dsan': [0.3366, 0.3465, 0.3498, 0.3569]
    },
    'churn': {
        'stat': [0.4055, 0.406, 0.4008, 0.4047],
        'mf': [0.3561, 0.3608, 0.3652, 0.377],
        'dsan': [0.3436, 0.3576, 0.3549, 0.3665]
    }
}

auc = {
    'adult': {
        'comp': .9052,
        'stat': [0.9034, .9015, .8978, .8962],
        'mf': [.9049, .9049, .9041, .9036],
        'dsan': [.9050, .9050, .9047, .9045]
    },
    'bank': {
        'comp': .9030,
        'stat': [.9020, .9002, .8989, .8962],
        'mf': [.9030, .9028, .9028, .9019],
        'dsan': [.9028, .9029, .9029, .9020]
    },
    'online': {
        'comp': 0.8945,
        'stat': [0.8889, 0.887, 0.8796, 0.8765],
        'mf': [0.8952, 0.8983, 0.898, 0.8993],
        'dsan': [0.8951, .8970, .8964, .8984]
    },
    'churn': {
        'comp': 0.8325,
        'stat': [0.8324, 0.8327, 0.8308, 0.8317],
        'mf': [0.8322, 0.8313, 0.8298, 0.8295],
        'dsan': [0.8323, .8320, .8316, .8307]
    }
}

In [3]:
def perform_lift(metric, dataset, comp=0.):
    if comp:
        dsans = np.mean(metric[dataset]['dsan'])
        imp = (comp - dsans) / dsans * 100
        print(round(imp, 1), '%')
    stats = np.mean(metric[dataset]['stat'])
    mfs = np.mean(metric[dataset]['mf'])
    dsans = np.mean(metric[dataset]['dsan'])
    print(f'in adult {dataset}')
    print('comparative to stat')
    imp = (stats - dsans) / dsans * 100
    print(round(imp, 2), '%')
    print('comparative to mf')
    imp = (mfs - dsans) / dsans * 100
    print(round(imp, 2), '%')
    print(f'stat avg: {round(stats, 4)}')
    print(f'mf avg: {round(mfs, 4)}')
    print(f'dsan avg: {round(dsans, 4)}')

In [4]:
def perform_lift2(metric, dataset):
    comp = metric[dataset]['comp']
    dsans = np.mean(metric[dataset]['dsan'])
    imp = (dsans - comp) / comp * 100
    print(f'in adult {dataset}')
    print('compare to complete')
    print(round(imp, 1), '%')
    stats = np.mean(metric[dataset]['stat'])
    mfs = np.mean(metric[dataset]['mf'])
    dsans = np.mean(metric[dataset]['dsan'])
    print('comparative to stat')
    imp = (dsans - stats) / stats * 100
    print(round(imp, 2), '%')
    print('comparative to mf')
    imp = (dsans - mfs) / mfs * 100
    print(round(imp, 2), '%')
    print(f'comp: {round(comp, 4)}')
    print(f'stat avg: {round(stats, 4)}')
    print(f'mf avg: {round(mfs, 4)}')
    print(f'dsan avg: {round(dsans, 4)}')

### NRMSE Lift

In [5]:
perform_lift(nrmse, 'adult')

in adult adult
comparative to stat
22.18 %
comparative to mf
5.79 %
stat avg: 1.0029
mf avg: 0.8684
dsan avg: 0.8208


In [6]:
perform_lift(nrmse, 'bank')

in adult bank
comparative to stat
22.05 %
comparative to mf
4.03 %
stat avg: 0.9748
mf avg: 0.8309
dsan avg: 0.7987


In [7]:
perform_lift(nrmse, 'online')

in adult online
comparative to stat
52.1 %
comparative to mf
1.77 %
stat avg: 0.9918
mf avg: 0.6636
dsan avg: 0.652


In [8]:
perform_lift(nrmse, 'churn')

in adult churn
comparative to stat
3.07 %
comparative to mf
3.13 %
stat avg: 0.9995
mf avg: 1.0002
dsan avg: 0.9698


### Error Rate Lift

In [9]:
perform_lift(error, 'adult')

in adult adult
comparative to stat
86.66 %
comparative to mf
3.53 %
stat avg: 0.4125
mf avg: 0.2288
dsan avg: 0.221


In [10]:
perform_lift(error, 'bank')

in adult bank
comparative to stat
55.68 %
comparative to mf
-0.57 %
stat avg: 0.408
mf avg: 0.2606
dsan avg: 0.262


In [11]:
perform_lift(error, 'online')

in adult online
comparative to stat
20.31 %
comparative to mf
0.76 %
stat avg: 0.418
mf avg: 0.3501
dsan avg: 0.3475


In [12]:
perform_lift(error, 'churn')

in adult churn
comparative to stat
13.67 %
comparative to mf
2.57 %
stat avg: 0.4043
mf avg: 0.3648
dsan avg: 0.3556


### AUC Lift

In [13]:
perform_lift2(auc, 'adult')

in adult adult
compare to complete
-0.0 %
comparative to stat
0.56 %
comparative to mf
0.05 %
comp: 0.9052
stat avg: 0.8997
mf avg: 0.9044
dsan avg: 0.9048


In [14]:
perform_lift2(auc, 'bank')

in adult bank
compare to complete
-0.0 %
comparative to stat
0.37 %
comparative to mf
0.0 %
comp: 0.903
stat avg: 0.8993
mf avg: 0.9026
dsan avg: 0.9026


In [15]:
perform_lift2(auc, 'online')

in adult online
compare to complete
0.2 %
comparative to stat
1.55 %
comparative to mf
-0.11 %
comp: 0.8945
stat avg: 0.883
mf avg: 0.8977
dsan avg: 0.8967


In [16]:
perform_lift2(auc, 'churn')

in adult churn
compare to complete
-0.1 %
comparative to stat
-0.03 %
comparative to mf
0.11 %
comp: 0.8325
stat avg: 0.8319
mf avg: 0.8307
dsan avg: 0.8316
