In [None]:
import warnings

# Run Active Learning w/o unsupervised bootstrapping
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    import md5, sha
    run_boot_al()



In [None]:
from noisy_activelearning import *
from datautils import*
from bootstrapping import*
from bootstrapping_unsupervised import*
from datetime import date
from learning import *
from libact_datasetext import *

#set parameters
dataPathmain = "../datasets/wdc_product"
query_strategy = 'default_committee'
model_type = 'rf'
dataset = 'headphones_headphones_catalog'
domain = 'wdc_product'
boot_size = 0
max_queries = 100
al_runs = 5
bootstrap_method = 'attrelbow_density' 
no_boot = False
boot = True

#prepare result tables
column_names = ["method", "1st iter.", "50th iter."]
for i in range(0, max_queries/100):
    column_names.append("%dth iter." %((i+1)*100))

iteration_results = pd.DataFrame(columns=column_names)

all_column_names = ["method"]
for i in range(0, max_queries):
    all_column_names.append("%dth iter." %(i))
full_results = pd.DataFrame(columns=all_column_names)

def run_boot_al():
                   
    #get data
    featureFile_pool = dataPathmain+'/features_'+dataset+'_train'
    featureFile_validation = dataPathmain+'/features_'+dataset+'_test'

    trainingData = getLabelledDataFromFile(featureFile_pool, rescale=True)
    validationData = getLabelledDataFromFile(featureFile_validation, rescale=True)

    X = trainingData['feature_values']
    y = trainingData['labels']
    ids = trainingData['ids']

    #NO BOOTSTRAPPING
    if (no_boot):
        bootstrap = BootstrappingUnsupervised(sample_size=0, data=X, labels=y, ids=ids, bootstrap_method='bowtopbottom', domain=domain)
        bootstrapping_sample = bootstrap.sample

        al= dict()
        al['pool_data']= X
        al['pool_labels']= y
        al['ids'] = ids

        al['validation_data']= validationData['feature_values']
        al['validation_labels']= validationData['labels']

        al['bootstrapping_data']= bootstrapping_sample['data']
        al['bootstrapping_labels']= bootstrapping_sample['labels']
        al['bootstrapping_indices']=bootstrapping_sample['indices']
        al['bootstrapping_scores'] = bootstrapping_sample['scores']

        display(Markdown("<span style='color:blue;font-size:160%'><b> Active Learning with default_committee Sampling, no Bootstrapping</b></span>" ))
        f1_test = active_learning(al, query_strategy, max_queries, al_runs, model_type)
        addtoresultstable("noboot", f1_test, iteration_results)

        display(Markdown("<span style='color:blue;font-size:160%'><b> Active Learning with default_committee Sampling, no Bootstrapping and warm start</b></span>" ))
        f1_test = active_learning(al, query_strategy, max_queries, al_runs, model_type, warm_start=True)
        addtoresultstable("noboot_warm_start", f1_test, iteration_results)

   
    #UNSUPERVISED BOOTSTRAPPING
    if (boot):
        bootstrap = BootstrappingUnsupervised(data=X, labels=y, ids=ids, bootstrap_method=bootstrap_method, domain=domain)
        bootstrapping_sample = bootstrap.sample
        boot_thres_index = bootstrap.threshold_index
        al= dict()
        al['pool_data']= X
        al['pool_labels']= y
        al['ids'] = ids

        al['validation_data']= validationData['feature_values']
        al['validation_labels']= validationData['labels']

        al['bootstrapping_data']= bootstrapping_sample['data']
        al['bootstrapping_labels']= bootstrapping_sample['labels']
        al['bootstrapping_indices']=bootstrapping_sample['indices']
        al['bootstrapping_scores'] = bootstrapping_sample['scores']
        al['bootstrapping_threshold'] = bootstrap.threshold


        display(Markdown("<span style='color:blue;font-size:160%'><b> Active Learning with "+query_strategy+" Sampling and Noisy but Interesting Bootstrapping score_based Reweight Warm Start</b></span>"))

        f1_test_noisy_reweight, pool_correctness = noisy_active_learning(al, query_strategy, max_queries, al_runs,
                                                          model_type, setting=None, reweight='score_based', warm_start=True)

        addtoresultstable("noisy_warm_"+query_strategy, f1_test_noisy_reweight, iteration_results)
        addtoresultstable("unsupervised_correctness", pool_correctness, iteration_results)

    display(iteration_results)
    
def addtoresultstable(method_name, results, resultstable):
    index = resultstable.shape[0]
    mean_f1 = np.mean(results, axis=0)
    std_f1= np.std(results, axis=0)
    
    
    f1_line = [method_name] + ["%.3f" %mean_f1[0]] + ["%.3f" %mean_f1[49]]
    std_line = ["st. deviaton"] + ["%.3f" %std_f1[0]] + ["%.3f" %std_f1[49]]
    
    for i in range(0, max_queries/100):
        position = (i+1)*100 - 1
        f1_line += ["%.3f" %mean_f1[position]]
        std_line += ["%.3f" %std_f1[position]]
    resultstable.loc[index] = f1_line
    resultstable.loc[index+1] = std_line
    
    f1_line = [method_name]
    std_line = ["st. deviaton"]
    for i in range(0,max_queries):
        f1_line += ["%.3f" %mean_f1[i]]
        std_line += ["%.3f" %std_f1[i]]
    index_full = full_results.shape[0]
    full_results.loc[index_full] = f1_line
    full_results.loc[index_full+1] = std_line
    
    
