In [1]:
import pandas as pd
import numpy as np
import re
import itertools
import collections
import pickle
from statistics import mean, stdev
from scipy import stats, spatial

In [2]:
# Information for NYUv2
data = 'NYUv2'
iters = 40000 # 20000 for large, 40000 for small
tasks = ('segment_semantic','normal','depth_zbuffer') # task 0, 1, 2
metrics = {'segment_semantic': ['mIoU', 'Pixel Acc'],
           'normal': ['Angle Mean', 'Angle Median', 'Angle 11.25', 'Angle 22.5', 'Angle 30'],
           'depth_zbuffer': ['abs_err','rel_err','sigma_1.25','sigma_1.25^2','sigma_1.25^3']}
metrics_prop = {'mIoU': False, 'Pixel Acc': False, 
                'Angle Mean': True, 'Angle Median': True, 'Angle 11.25': False, 'Angle 22.5': False, 'Angle 30': False,
                'abs_err': True,'rel_err': True,'sigma_1.25': False,'sigma_1.25^2': False,'sigma_1.25^3': False} # True: the lower the better
reduce_var_length = {'train_loss': 10, 'val_loss': 3, 'val_acc': 3}

In [2]:
# Information for Taskonomy
data = 'Taskonomy'
iters = 50000
tasks = ('segment_semantic','normal','depth_zbuffer','keypoints2d','edge_texture') # task 0, 1, 2, 3, 4
metrics = {'segment_semantic': ['err'],'normal': ['cosine_similarity'], 'depth_zbuffer': ['abs_err'],
           'keypoints2d': ['key_err'],'edge_texture': ['edge_err']}
metrics_prop = {'err': True, 'abs_err': True, 'cosine_similarity': False, 'key_err': True, 'edge_err': True} # True: the lower the better
reduce_var_length = {'train_loss': 10, 'val_loss': 3, 'val_acc': 3}

In [47]:
date = '0123'
model = 'resnet'
B = 5 # 17 - fined, 5 - coarse
coarse = True

In [3]:
date = '0227'
model = 'mobilenet'
B = 5 # 9 - coarse v1/v4 with init weights, 6 - coarse v2, 5 - coarse v3/v5 with init weights, 8 - short network
coarse = True

In [4]:
def confirm_complete(lines, iters):
    rev_lines = lines[::-1]
    for line in rev_lines:
        if 'Iter' in line:
            it = int(line.split(' ')[1])
            break
    if it != iters:
        return False
    else:
        return True

def extract_loss_results(lines, tasks, reduce_var_length):
    # Function: Extract train loss / val loss 
    train_loss = {}
    val_loss = {}
    for task in tasks:
        train_loss[task] = collections.deque(reduce_var_length['train_loss']*[0], reduce_var_length['train_loss'])
        val_loss[task] = collections.deque(reduce_var_length['val_loss']*[0], reduce_var_length['val_loss'])
    
    for line in lines:
        for task in tasks:
            if task[:4] in line and 'Train Loss' in line:
                loss = float(line.split(': ')[1])
                train_loss[task].append(loss)
            elif task[:4] in line and 'Val Loss' in line:
                loss = float(line.split(': ')[1])
                val_loss[task].append(loss)
                
    avg_loss = {'train_loss': [], 'val_loss': []}
    std_loss = {'train_loss': [], 'val_loss': []}
    for task in tasks:
        avg_loss['train_loss'].append(mean(train_loss[task]))
        avg_loss['val_loss'].append(mean(val_loss[task]))
        
        std_loss['train_loss'].append(stdev(train_loss[task]))
        std_loss['val_loss'].append(stdev(val_loss[task]))
    return avg_loss, std_loss

def extract_metric_results(lines, tasks, metrics, reduce_var_length):
    # Function: Extract val metrics
    metric_queue = {}
    for task in tasks:
        for metric in metrics[task]:
            metric_queue[metric] = collections.deque(reduce_var_length['val_acc']*[0], reduce_var_length['val_acc'])
    
    for line in lines:
        for task in tasks:
            for metric in metrics[task]:
                if "'"+metric in line:
                    value = float(re.findall("\d+\.\d+", line.split(metric)[1])[0])
                    metric_queue[metric].append(value)
                
    avg_metric = {}
    for task in tasks:
        for metric in metrics[task]:
            avg_metric[metric] = (mean(metric_queue[metric]))
    return avg_metric

def rel_perf(results, lower=True):
    # Function: Compute rel. perf.
    if lower:
        return (results[0,:] - results)/results[0,:]*100
    else:
        return (results - results[0,:])/results[0,:]*100

# For 2-Task Loss

In [5]:
loss_df = {'train_loss': pd.DataFrame(), 'val_loss': pd.DataFrame()}

# For all two tasks
for two_task in set(itertools.combinations(tasks, 2)):
    print(two_task)
    
    # For all branching points
    loss_results = {'train_loss': [], 'val_loss': []}
    loss_std  = {'train_loss': [], 'val_loss': []}
    block = B
    for i in range(block+1):
        log = '_'.join(two_task) + '_b' + str(i) + '.stdout'
        if coarse:
            log = '2task_coarse_'+data+'_'+model+'/'+log
        else:
            log = '2task_fined_'+data+'_'+model+'/'+log
        # Read in content
        with open('./log/'+log) as f:
            lines = f.readlines()
            lines = [line.rstrip() for line in lines]
            if not confirm_complete(lines,iters):
                print(str(i) + ' not complete')
#                 continue
            avg_loss, std_loss = extract_loss_results(lines, two_task, reduce_var_length)
            for mode in loss_results:
                loss_results[mode].append(avg_loss[mode])
                loss_std[mode].append(std_loss[mode])
    
    # Compute loss std 
    # 1. avg of std of loss in one 2-task model
    # 2. std of loss in multiple 2-task models with different b:
    for mode in loss_results:
        print(mode)
        single_temp = np.array(loss_std[mode])
        multiple_temp = np.array(loss_results[mode])
        for i in range(single_temp.shape[1]):
            print('task '+str(i))
            print('single '+str(mean(single_temp[:,i])))
            print('multiple '+str(stdev(multiple_temp[:,i])))
        print('='*80)
           
    # Add results to dataframe
    task0_idx = tasks.index(two_task[0])
    task1_idx = tasks.index(two_task[1])
    col_name = '(' + str(task0_idx) + ', '+ str(task1_idx) + ')' + '-'
    for mode in loss_df:
        rel_mode_loss = rel_perf(np.array(loss_results[mode]))
        for idx in range(2):
            loss_df[mode][col_name+str(idx)] = rel_mode_loss[:,idx]

('segment_semantic', 'depth_zbuffer')
train_loss
task 0
single 0.010959280780406264
multiple 0.02730537560017566
task 1
single 0.006321642408127466
multiple 0.08174401231894603
val_loss
task 0
single 0.0040983859410404075
multiple 0.01488139280618091
task 1
single 0.0026325188875806795
multiple 0.041571281041211695
('normal', 'depth_zbuffer')
train_loss
task 0
single 0.000583504491542647
multiple 0.004079443180958237
task 1
single 0.008756523384219745
multiple 0.023980789603347093
val_loss
task 0
single 8.000753082406368e-05
multiple 0.0007415948779238955
task 1
single 0.01110427849584414
multiple 0.01324858063676594
('segment_semantic', 'normal')
train_loss
task 0
single 0.006929730045089003
multiple 0.03815670792927501
task 1
single 0.0006004096531187283
multiple 0.0014388803517550247
val_loss
task 0
single 0.002508927063576662
multiple 0.010927120524775213
task 1
single 0.0001980177169747661
multiple 0.0017254521683272394


In [6]:
# Save to Excel
for mode in loss_df:
    loss_df[mode].to_excel("./2task/"+ data + "_2task_metrics_" + model + "_" + date + "_" + mode + ".xlsx", index_label='branch')  

In [31]:
loss_df

{'train_loss':     (0, 1)-0   (0, 1)-1  (1, 4)-0   (1, 4)-1  (0, 2)-0   (0, 2)-1  (0, 3)-0  \
 0   0.000000   0.000000  0.000000   0.000000  0.000000   0.000000  0.000000   
 1  -1.788667  -8.997792 -0.603041  -0.052253 -3.866885  -5.253456 -6.617167   
 2   3.988598 -23.258278 -3.347317  -5.029393 -6.616476 -13.133641 -2.691286   
 3  -0.054400 -36.821192 -3.976578  -9.235794  0.619749 -19.861751 -2.909028   
 4  10.044390 -63.205298 -5.235099 -11.626388 -0.539007 -43.225806 -3.294430   
 5   0.480895 -69.818985 -5.864359 -12.175049 -3.775232 -48.156682 -0.792579   
 
     (0, 3)-1    (2, 4)-0    (2, 4)-1  (1, 3)-0   (1, 3)-1  (3, 4)-0  (3, 4)-1  \
 0   0.000000    0.000000    0.000000  0.000000   0.000000  0.000000  0.000000   
 1  -8.954496 -102.279982 -237.990519 -1.471228  -0.011996 -0.145684 -1.198314   
 2 -31.999512  -14.181487   -3.239400 -6.990511  -4.162668 -1.129052 -3.937319   
 3 -40.600220  -19.425445   -1.922570 -7.982937  -5.422265 -2.209542 -4.490387   
 4 -66.890326 

# For 2-Task Metrics

In [7]:
if data == 'NYUv2':
    baselines_2task = {'mIoU': [], 'Pixel Acc': [], 
                'Angle Mean': [], 'Angle Median': [], 'Angle 11.25': [], 'Angle 22.5': [], 'Angle 30': [],
                'abs_err': [],'rel_err': [],'sigma_1.25': [],'sigma_1.25^2': [],'sigma_1.25^3': []}
elif data == 'Taskonomy':
    baselines_2task = {'err': [], 'abs_err': [], 'cosine_similarity': [], 'key_err': [], 'edge_err': []}

metric_df = pd.DataFrame()

# For all two tasks
for two_task in set(itertools.combinations(tasks, 2)):
    print(two_task)

    metric_results = {}
    for task in two_task:
        for metric in metrics[task]:
            metric_results[metric] = []
            
    # For all branching points, get metrics results
    block = B
    for i in range(block+1):
        log = '_'.join(two_task) + '_b' + str(i) + '.stdout'
        if coarse:
            log = '2task_coarse_'+data+'_'+model+'/'+log
        else:
            log = '2task_fined_'+data+'_'+model+'/'+log
        # Read in content
        with open('./log/'+log) as f:
            lines = f.readlines()
            lines = [line.rstrip() for line in lines]
            if not confirm_complete(lines,iters):
                print(str(i) + ' not complete')
#                 continue
            avg_metric = extract_metric_results(lines, two_task, metrics, reduce_var_length)
            for task in two_task:
                for metric in metrics[task]:
                    metric_results[metric].append(avg_metric[metric])
    
    # Take down baselines
    for task in two_task:
        for metric in metrics[task]:
            baselines_2task[metric].append(metric_results[metric][0])
    
    # Compute relative performance for each task
    task_rel_perf = {}
    for task in two_task:
        temp = np.zeros((block + 1, 1))
        idx = 0
        for metric in metrics[task]:
            idx += 1
            temp += rel_perf(np.expand_dims(np.array(metric_results[metric]), axis=1), metrics_prop[metric])
        task_rel_perf[task] = temp/idx
        
    # Add results to dataframe
    task0_idx = tasks.index(two_task[0])
    task1_idx = tasks.index(two_task[1])
    col_name = '(' + str(task0_idx) + ', '+ str(task1_idx) + ')' + '-'
    idx = 0
    for task in two_task:
        metric_df[col_name+str(idx)] = np.squeeze(task_rel_perf[task])
        idx += 1

('segment_semantic', 'depth_zbuffer')
('normal', 'depth_zbuffer')
('segment_semantic', 'normal')


In [8]:
# Save to Excel
metric_df.to_excel("./2task/" + data + "_2task_metrics_" + model + "_" + date + "_val_acc.xlsx", index_label='branch')  

In [9]:
metric_df

Unnamed: 0,"(0, 2)-0","(0, 2)-1","(1, 2)-0","(1, 2)-1","(0, 1)-0","(0, 1)-1"
0,0.0,0.0,0.0,0.0,0.0,0.0
1,-2.022956,6.330822,-0.936176,-0.913969,1.657218,-1.361153
2,-2.875063,8.719301,-1.295447,-1.763708,2.546206,-1.979081
3,-3.10126,8.039148,-2.479126,-0.043601,1.056306,-0.709077
4,-2.597027,9.166514,-1.949955,0.962765,1.647703,0.002816
5,-4.130238,10.175742,-2.136591,-0.060026,2.391984,0.676699


# For Layout Results

In [10]:
def rel_perf_baselines(results, baselines, lower=True):
    # Function: Compute rel. perf. for layouts
    if lower:
        return (baselines - results)/baselines*100
    else:
        return (results - baselines)/baselines*100
    
def save_obj(obj, name):
    with open('./ntask/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('./ntask/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [13]:
# Layout index for T=3, B=17/5, NYUv2
# layout_idx = [383,342,415,344,278,251,231,161,65,484,329,481,268,374,79,55,368] # verify_1014 + verify_1012(xiao)
# layout_idx = [0,55,72,86,103,200,227,239,349,376,485,493] # verify_1102 (coarse)
# layout_idx = [484,492,487,495,379,397,267,486,490,387] # verify_1102 (flops)
# layout_idx = [484,487] # verify_1116 (similar layouts)
# layout_idx = [0,47,43,34,41,30,44,35,36,49,17,28,26,39,38,10,11,48,42,23,9,33,14,21,5,4,2] # verify_1118 (similar layouts, w/ coarse bps)
# layout_idx = [0,45,49,37,34,50] # top-5

# Layout index for T=3, B=9/6/5, NYUv2
# layout_idx = [11,18,20,25,24,131,137,111,113,106,110,101,115,125,68,42,66,67,96,100,46,61,91,35,72,88,86,77,84] # verify_1214
# layout_idx = [8,12,14,19,62,65,59,61,67,58,53,28,45,38,1,48,25,23,37] # verify_0105
# layout_idx = [7,11,10,16,15,0,39,31,49,38,48,40,17,4,1,27,6] # verify_0111
# layout_idx = [0,7,11,10,9,8,2] # top-5
# layout_idx = [11,18,20,25,24,131,137,111,113,106,110,101,115,125,68,42,66,67,96,100,46,61,91,35,72,88,86,77,84] # verify_0116
# layout_idx = [0,47,45,46,43,30,7,37,41,35,9,17,19,25,23,49,50,12,21] # verify_0123 or 0124
layout_idx = [0,7,11,10,9,8,16,15,39,31,49,38,48,40,17,4,1,27,6,23] # verify_0221

# Layout index for T=5, B=17/5, Taskonomy
# layout_idx = [480,360,350,483,1043,
#               2260,2083,2218,2091,2207,5301,2247,2255,7466,5300,1570,2202,6273,3773,6020] # verify_0123
# layout_idx = [352,958,480,353,360,817,1,562,4697,6539,4] # under flops

# Layout index for T=5, B=5, Taskonomy
# layout_idx = [3221,3220,2947,3215,3261,3043,1203,3005,2531,626,1331] # verify_0216

base_2task = True # True: baseline from 2task baseline, False: baseline from policymtl
base_2layout = False # True: baseline from layout 2 (the last layout idx)

In [14]:
if base_2task:
    if data == 'Taskonomy':
        baselines = {'train_loss': [0.5483,0.1124,0.0234,0.0706,0.0711], 'val_loss': [0.6332,0.1023,0.0253,0.1065,0.0905]}
    elif data =='NYUv2':
        baselines = {'train_loss': [0.5333,0.0603,0.5743], 'val_loss': [1.609,0.0636,0.6785]}
    # baselines from 2task b0 - only has metrics no loss now
    for metric in baselines_2task:
        baselines[metric] = mean(baselines_2task[metric])
else:
    # baselines are from policymtl
    if data == 'NYUv2':
        baselines = {'train_loss': [0.5333, 0.0603, 0.5743], 'val_loss': [1.609, 0.0636, 0.6785],
                     'mIoU': 0.265, 'Pixel Acc': 0.582, 
                     'Angle Mean': 17.7, 'Angle Median': 16.3, 'Angle 11.25': 29.4, 'Angle 22.5': 72.3, 'Angle 30': 87.3,
                     'abs_err': 0.62,'rel_err': 0.24,'sigma_1.25': 57.8,'sigma_1.25^2': 85.8,'sigma_1.25^3': 96} 
    elif data == 'Taskonomy':
        baselines = {'train_loss': [0.5483,0.1124,0.0234,0.0706,0.0711], 'val_loss': [0.6332,0.1023,0.0253,0.1065,0.0905],
                     'err': 1.0096, 'abs_err': 0.0277, 'cosine_similarity': 0.7662, 'key_err': 0.2395, 'edge_err': 0.2681}

In [16]:
loss_results = {'train_loss': [], 'val_loss': []}
metric_results = {}
for task in tasks:
    for metric in metrics[task]:
        metric_results[metric] = []
        
# For each layout
for idx in layout_idx:
    log = 'layout_' + str(idx) + '.stdout'
    with open('./log/layout_'+data+'_'+model+'/'+log) as f:
        lines = f.readlines()
        lines = [line.rstrip() for line in lines]
        if not confirm_complete(lines,iters):
                print(str(idx) + ' not complete')
#                 continue
        avg_loss,_ = extract_loss_results(lines, tasks, reduce_var_length)
        avg_metric = extract_metric_results(lines, tasks, metrics, reduce_var_length)
        for mode in loss_results:
                loss_results[mode].append(avg_loss[mode])
        for task in tasks:
            for metric in metrics[task]:
                metric_results[metric].append(avg_metric[metric])
                
# Compute rel. perf.
for mode in loss_results:
    loss_results[mode] = rel_perf_baselines(np.array(loss_results[mode]), np.array(baselines[mode]))
    
task_rel_perf = {}
for task in tasks:
    temp = np.zeros(len(layout_idx))
    idx = 0
    for metric in metrics[task]:
        idx += 1
        if base_2layout:
            temp += rel_perf_baselines(np.array(metric_results[metric]), metric_results[metric][-1], metrics_prop[metric])
        else:
            temp += rel_perf_baselines(np.array(metric_results[metric]), baselines[metric], metrics_prop[metric])
    task_rel_perf[task] = temp/idx

In [17]:
for key in task_rel_perf:
    print(key)
    for value in task_rel_perf[key]:
        print(value)

segment_semantic
-0.8656516311456292
-2.0787258654850804
-1.3678982109153346
-0.8050266167726503
-1.6793946841728895
-1.2966804595973132
2.6339261289694944
1.2266564400037563
0.13675631770640406
-0.21142441764379294
-0.627407837035167
-0.9084003948776682
-2.7276239294244626
-0.15789650924340276
0.9038771600772911
1.020603954022858
-0.8097462603068619
0.308048624890791
-1.5076842791888057
-1.9026786334239154
normal
-0.13742845568777678
-1.9787690131353806
-1.0637272386154912
0.512172348803006
-0.5360136955050103
-1.9210850845426328
-0.5967654834563263
-0.9322384011726061
-0.49907924042938934
-1.1110117296529902
-1.1858074351001862
-0.9985554645879684
-0.8505645937433988
-1.3037542658426733
-2.3528849599315884
-2.4710921269314112
-2.8504323666095264
-1.559327762729813
-2.8015107956655525
0.0010868770458095866
depth_zbuffer
11.560826656392184
11.097546573054212
10.184360189919259
6.8651907563075785
4.563374483260416
2.9957251121445228
0.3931891118324125
-1.985898932539591
7.15105053787804

In [18]:
# Save all results to one dict
real_results = {}
real_results['layout'] = layout_idx

for mode in loss_results:
    real_results[mode] = loss_results[mode]
    
val_acc = None
for task in task_rel_perf:
    if val_acc is None:
        val_acc = np.expand_dims(task_rel_perf[task], axis=1)
    else:
        val_acc = np.concatenate((val_acc, np.expand_dims(task_rel_perf[task], axis=1)), axis=1)
real_results['val_acc'] = val_acc

In [19]:
real_results

{'layout': [0,
  7,
  11,
  10,
  9,
  8,
  16,
  15,
  39,
  31,
  49,
  38,
  48,
  40,
  17,
  4,
  1,
  27,
  6,
  23],
 'train_loss': array([[-75.14907182,   6.08623549,  14.08845551],
        [-65.11906994,   7.72802653,  15.49712694],
        [-66.59666229,   7.29684909,  15.43095943],
        [-67.74985937,   8.62354892,  13.45115793],
        [-68.58803675,   7.18076285,   9.57165245],
        [-76.72604538,   5.73797678,  -7.53961344],
        [-64.9728108 ,   4.59369818, -17.75552847],
        [-58.42302644,   6.03648425, -16.59585582],
        [-58.14925933,   5.2238806 ,  13.1655929 ],
        [-66.33977124,  10.33167496,  12.3715828 ],
        [-64.55653478,   5.5721393 ,  15.69562946],
        [-60.31876992,   7.18076285,  11.89273899],
        [-68.22801425,   6.53399668,  14.47153056],
        [-69.81248828,   0.14925373,  14.09716176],
        [-70.08063004,  -3.38308458,  -1.41737768],
        [-66.22538909,  -2.0066335 , -20.21417378],
        [-72.69829364,  -4.278

In [20]:
# Save to pickle
save_obj(real_results, 'real_results_'+data+'_'+model+'_'+date)

In [19]:
load_obj('real_results_'+data+'_'+model+'_'+date)

{'layout': [11,
  18,
  20,
  25,
  24,
  131,
  137,
  111,
  113,
  106,
  110,
  101,
  115,
  125,
  68,
  42,
  66,
  67,
  96,
  100,
  46,
  61,
  91,
  35,
  72,
  88,
  86,
  77,
  84],
 'train_loss': array([[-67.69548097,  10.58043118,  14.98519937],
        [-61.62947684,  10.58043118,  17.00156713],
        [-48.00112507,   5.73797678, -19.42016368],
        [-58.49615601,  10.29850746, -19.42016368],
        [-56.50478155,  13.25041459, -19.42016368],
        [-50.14250891,   6.65008292,  16.62197458],
        [-65.17719857,   6.53399668,  15.46230193],
        [-58.41740109,  -1.44278607,  14.7849556 ],
        [-60.61691356,   5.05804312,  14.07452551],
        [-48.57491093,   6.21890547,   3.32578792],
        [-50.39939996,   5.68822554,   2.50914156],
        [-66.64916557,  10.8291874 ,  15.13320564],
        [-63.56459779,  -0.8291874 ,  12.71635034],
        [-58.88618039,   7.47927032,  15.28295316],
        [-60.35252203,   9.66832504,  16.88838586],
        [-6