In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import copy
import numpy as np
import pandas as pd
import pickle
import sys
import torch
import warnings

warnings.filterwarnings('ignore')

sys.path.insert(0,'../..')

from methods.bag_of_ngrams.processing import (cleanReport, cleanReports, cleanSplit, getCounter, 
                                              getTrainedVectorizer, STRIPCHARS, unkReports)
from methods.sklearn_calibration import *
from sklearn.linear_model import LogisticRegression
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import f1_score, recall_score
from sklearn.model_selection import RandomizedSearchCV
from pyfunctions.general import *

# 1. Set up

In [5]:
args = {'domain': 'prostate',
        'epochs': 20,
        'embeddingDim': 300,
        'maxDocLength': 1346,
        'target_fields': ['TreatmentEffect','TumorType','PrimaryGleason','SecondaryGleason','TertiaryGleason',
                          'SeminalVesicleNone','LymphNodesNone','MarginStatusNone','ExtraprostaticExtension',
                          'PerineuralInfiltration','RbCribriform','BenignMargins'],
        'n_tries': 20 # Number of random search candidates
        }

# Read in data
path = "../../data/" + args['domain'] + ".json"
data = readJson(path)

# Process reports
data = cleanSplit(data, STRIPCHARS)

# Unk rare words
counter = getCounter(data['train'])
data['train'] = unkReports(data['train'], counter)
data['val'] = unkReports(data['val'], counter)
data['test'] = unkReports(data['test'], counter)

# 2. Vectorize text and train model

In [6]:
# Random search parameters
params = { 'C': np.logspace(-6,6,1000)}

field = args['target_fields'][3]
N = 3
    
# Extract labels and reports
corpus_train = extractListFromDic(data['train'], 'clean_document_unked')
y_train = extractListFromDic(data['train'], 'labels', field)

corpus_val = extractListFromDic(data['val'], 'clean_document_unked')
y_val = extractListFromDic(data['val'], 'labels', field)

corpus_test = extractListFromDic(data['test'], 'clean_document_unked')
y_test = extractListFromDic(data['test'], 'labels', field)

# Vectorizer documents
vectorizer = getTrainedVectorizer(corpus_train, N, 1)  
X_train = vectorizer.transform(corpus_train)
X_val = vectorizer.transform(corpus_val)
X_test = vectorizer.transform(corpus_test)

model = LogisticRegression(penalty = 'l1', class_weight = 'balanced',solver = 'liblinear')
clf = RandomizedSearchCV(model, params, cv=3, n_iter = 20, n_jobs = 20)
clf.fit(X_train, y_train)

RandomizedSearchCV(cv=3,
                   estimator=LogisticRegression(class_weight='balanced',
                                                penalty='l1',
                                                solver='liblinear'),
                   n_iter=20, n_jobs=20,
                   param_distributions={'C': array([1.00000000e-06, 1.02804473e-06, 1.05687597e-06, 1.08651577e-06,
       1.11698682e-06, 1.14831241e-06, 1.18051653e-06, 1.21362380e-06,
       1.24765955e-06, 1.28264983e-06, 1.31862140e-06, 1.35560179e-06,
       1.393619...
       5.29326606e+05, 5.44171429e+05, 5.59432571e+05, 5.75121707e+05,
       5.91250841e+05, 6.07832313e+05, 6.24878807e+05, 6.42403366e+05,
       6.60419396e+05, 6.78940681e+05, 6.97981391e+05, 7.17556092e+05,
       7.37679760e+05, 7.58367791e+05, 7.79636013e+05, 8.01500696e+05,
       8.23978568e+05, 8.47086827e+05, 8.70843150e+05, 8.95265713e+05,
       9.20373200e+05, 9.46184819e+05, 9.72720319e+05, 1.00000000e+06])})

# 3. Get output

In [7]:
pred_val = pd.DataFrame({'label': np.array(y_val).astype(str), 
                         'prediction': clf.predict(X_val), 
                         'probability': np.max(clf.predict_proba(X_val), axis=1)})

pred_test = pd.DataFrame({'label': np.array(y_test).astype(str), 
                          'prediction': clf.predict(X_test), 
                          'probability': np.max(clf.predict_proba(X_test), axis=1)})

# 4. Calibrate predictions

In [9]:
classes = clf.classes_

calibrated_scores = np.zeros((pred_test.shape[0], len(classes)))

pred_test['correct'] = pred_test['label'] == pred_test['prediction']
pred_val['correct'] = pred_val['label'] == pred_val['prediction']

probs_val = clf.predict_proba(X_val)
probs_test = clf.predict_proba(X_test)

# Multiclass calibration
for p in range(len(classes)):
    pred_val['correct'] = pred_val['label'] == classes[p]
    pred_val['correct'] = pred_val['correct'].astype(int)

    X = probs_val[:,p].reshape(-1,)
    y = pred_val['correct']

    reg = IsotonicRegression()
    reg.fit(X,y)

    X_eval = probs_test[:,p].reshape(-1,)
    X_eval[X_eval < reg.X_min_] = reg.X_min_
    X_eval[X_eval > reg.X_max_] = reg.X_max_

    calibrated_scores[:,p] = reg.predict(X_eval)

# Normalize calibrated scores
row_sums = calibrated_scores.sum(axis=1)
calibrated_scores = calibrated_scores / row_sums[:, np.newaxis]
calibrated_scores = np.max(calibrated_scores, axis = 1)

# Calculate expected calibration error
ece = ece_mce_error(calibrated_scores, pred_test['prediction'].astype(str), 
                            pred_test['label'].astype(str), num_bins = 10, plot = None)

pred_test['calibrated_score'] = calibrated_scores

[(0.0, 0.1), (0.1, 0.2), (0.2, 0.30000000000000004), (0.30000000000000004, 0.4), (0.4, 0.5), (0.5, 0.6000000000000001), (0.6000000000000001, 0.7000000000000001), (0.7000000000000001, 0.8), (0.8, 0.9), (0.9, 1.0)]
{0.9923587371807763, 0.9832002203249794, 0.9920593689471917, 0.9918534696447951, 0.989118076985545, 0.4691943127962085, 0.9782450046060024, 0.9805750552937783, 0.7154574132492113, 0.9828666457169396, 0.9914529914529915, 0.45013715467749915, 0.7776581554024685, 1.0, 0.886001074487398, 0.5238095238095238, 0.9759462604285754, 0.5743740795287187, 0.8571607987608323, 0.5215200380407307, 0.5982905982905984, 0.9710346814338391, 0.9839412997182895, 0.8518822275075864, 0.6976744186046512, 0.7906295754026355, 0.678652108433735, 0.9790874524714829, 0.9889006801962351, 0.8865435356200528, 0.8824836048233552, 0.9955587132043486, 0.9934250579751882, 0.9964302533761475, 0.9823637512134542, 0.5988593155893536, 0.9940378300290317, 0.9975768335692522, 0.43893711339692926, 0.8698187877525515, 0.

In [10]:
print('expected calibration error:', ece[0])

expected calibration error: 0.015619947722185178
