In [1]:
import pandas as pd
import numpy as np
import re
import itertools
import collections
import pickle
from statistics import mean, stdev
from scipy import stats, spatial

In [2]:
# Information for NYUv2
data = 'NYUv2'
iters = 20000 # 20000 for large, 40000 for small
tasks = ('segment_semantic','normal','depth_zbuffer') # task 0, 1, 2
metrics = {'segment_semantic': ['mIoU', 'Pixel Acc'],
           'normal': ['Angle Mean', 'Angle Median', 'Angle 11.25', 'Angle 22.5', 'Angle 30'],
           'depth_zbuffer': ['abs_err','rel_err','sigma_1.25','sigma_1.25^2','sigma_1.25^3']}
metrics_prop = {'mIoU': False, 'Pixel Acc': False, 
                'Angle Mean': True, 'Angle Median': True, 'Angle 11.25': False, 'Angle 22.5': False, 'Angle 30': False,
                'abs_err': True,'rel_err': True,'sigma_1.25': False,'sigma_1.25^2': False,'sigma_1.25^3': False} # True: the lower the better
reduce_var_length = {'train_loss': 10, 'val_loss': 3, 'val_acc': 3}

In [46]:
# Information for Taskonomy
data = 'Taskonomy'
iters = 50000
tasks = ('segment_semantic','normal','depth_zbuffer','keypoints2d','edge_texture') # task 0, 1, 2, 3, 4
metrics = {'segment_semantic': ['err'],'normal': ['cosine_similarity'], 'depth_zbuffer': ['abs_err'],
           'keypoints2d': ['key_err'],'edge_texture': ['edge_err']}
metrics_prop = {'err': True, 'abs_err': True, 'cosine_similarity': False, 'key_err': True, 'edge_err': True} # True: the lower the better
reduce_var_length = {'train_loss': 10, 'val_loss': 3, 'val_acc': 3}

In [47]:
date = '0123'
model = 'resnet'
B = 5 # 17 - fined, 5 - coarse
coarse = True

In [27]:
date = '0203'
model = 'mobilenet'
B = 5 # 9 - coarse v1/v4 with init weights, 6 - coarse v2, 5 - coarse v3/v5 with init weights, 8 - short network
coarse = True

In [48]:
def confirm_complete(lines, iters):
    rev_lines = lines[::-1]
    for line in rev_lines:
        if 'Iter' in line:
            it = int(line.split(' ')[1])
            break
    if it != iters:
        return False
    else:
        return True

def extract_loss_results(lines, tasks, reduce_var_length):
    # Function: Extract train loss / val loss 
    train_loss = {}
    val_loss = {}
    for task in tasks:
        train_loss[task] = collections.deque(reduce_var_length['train_loss']*[0], reduce_var_length['train_loss'])
        val_loss[task] = collections.deque(reduce_var_length['val_loss']*[0], reduce_var_length['val_loss'])
    
    for line in lines:
        for task in tasks:
            if task[:4] in line and 'Train Loss' in line:
                loss = float(line.split(': ')[1])
                train_loss[task].append(loss)
            elif task[:4] in line and 'Val Loss' in line:
                loss = float(line.split(': ')[1])
                val_loss[task].append(loss)
                
    avg_loss = {'train_loss': [], 'val_loss': []}
    std_loss = {'train_loss': [], 'val_loss': []}
    for task in tasks:
        avg_loss['train_loss'].append(mean(train_loss[task]))
        avg_loss['val_loss'].append(mean(val_loss[task]))
        
        std_loss['train_loss'].append(stdev(train_loss[task]))
        std_loss['val_loss'].append(stdev(val_loss[task]))
    return avg_loss, std_loss

def extract_metric_results(lines, tasks, metrics, reduce_var_length):
    # Function: Extract val metrics
    metric_queue = {}
    for task in tasks:
        for metric in metrics[task]:
            metric_queue[metric] = collections.deque(reduce_var_length['val_acc']*[0], reduce_var_length['val_acc'])
    
    for line in lines:
        for task in tasks:
            for metric in metrics[task]:
                if "'"+metric in line:
                    value = float(re.findall("\d+\.\d+", line.split(metric)[1])[0])
                    metric_queue[metric].append(value)
                
    avg_metric = {}
    for task in tasks:
        for metric in metrics[task]:
            avg_metric[metric] = (mean(metric_queue[metric]))
    return avg_metric

def rel_perf(results, lower=True):
    # Function: Compute rel. perf.
    if lower:
        return (results[0,:] - results)/results[0,:]*100
    else:
        return (results - results[0,:])/results[0,:]*100

# For 2-Task Loss

In [29]:
loss_df = {'train_loss': pd.DataFrame(), 'val_loss': pd.DataFrame()}

# For all two tasks
for two_task in set(itertools.combinations(tasks, 2)):
    print(two_task)
    
    # For all branching points
    loss_results = {'train_loss': [], 'val_loss': []}
    loss_std  = {'train_loss': [], 'val_loss': []}
    block = B
    for i in range(block+1):
        log = '_'.join(two_task) + '_b' + str(i) + '.stdout'
        if coarse:
            log = '2task_coarse_'+data+'_'+model+'/'+log
        else:
            log = '2task_fined_'+data+'_'+model+'/'+log
        # Read in content
        with open('./log/'+log) as f:
            lines = f.readlines()
            lines = [line.rstrip() for line in lines]
            if not confirm_complete(lines,iters):
                print(str(i) + ' not complete')
#                 continue
            avg_loss, std_loss = extract_loss_results(lines, two_task, reduce_var_length)
            for mode in loss_results:
                loss_results[mode].append(avg_loss[mode])
                loss_std[mode].append(std_loss[mode])
    
    # Compute loss std 
    # 1. avg of std of loss in one 2-task model
    # 2. std of loss in multiple 2-task models with different b:
    for mode in loss_results:
        print(mode)
        single_temp = np.array(loss_std[mode])
        multiple_temp = np.array(loss_results[mode])
        for i in range(single_temp.shape[1]):
            print('task '+str(i))
            print('single '+str(mean(single_temp[:,i])))
            print('multiple '+str(stdev(multiple_temp[:,i])))
        print('='*80)
           
    # Add results to dataframe
    task0_idx = tasks.index(two_task[0])
    task1_idx = tasks.index(two_task[1])
    col_name = '(' + str(task0_idx) + ', '+ str(task1_idx) + ')' + '-'
    for mode in loss_df:
        rel_mode_loss = rel_perf(np.array(loss_results[mode]))
        for idx in range(2):
            loss_df[mode][col_name+str(idx)] = rel_mode_loss[:,idx]

('segment_semantic', 'normal')
5 not complete
train_loss
task 0
single 0.022699247081909416
multiple 0.019870215650566042
task 1
single 0.001668331637264607
multiple 0.032190911553832496
val_loss
task 0
single 0.0022007006389264216
multiple 0.011031720258049148
task 1
single 0.00020947276523993402
multiple 0.029058543717174867
('normal', 'edge_texture')
train_loss
task 0
single 0.0015610575531589364
multiple 0.00274819516531608
task 1
single 0.000940429864168775
multiple 0.004218277689610612
val_loss
task 0
single 0.00012872796090597607
multiple 0.002083524435679617
task 1
single 0.00046805113774127296
multiple 0.0014375004025764305
('segment_semantic', 'depth_zbuffer')
train_loss
task 0
single 0.024175241524813092
multiple 0.012986458203323448
task 1
single 0.00032273789716875527
multiple 0.0043197750713048315
val_loss
task 0
single 0.0023236675051581616
multiple 0.007576796938802822
task 1
single 0.00018888670844770036
multiple 0.004339965010953003
('segment_semantic', 'keypoints2d')

In [30]:
# Save to Excel
for mode in loss_df:
    loss_df[mode].to_excel("./2task/"+ data + "_2task_metrics_" + model + "_" + date + "_" + mode + ".xlsx", index_label='branch')  

In [31]:
loss_df

{'train_loss':     (0, 1)-0   (0, 1)-1  (1, 4)-0   (1, 4)-1  (0, 2)-0   (0, 2)-1  (0, 3)-0  \
 0   0.000000   0.000000  0.000000   0.000000  0.000000   0.000000  0.000000   
 1  -1.788667  -8.997792 -0.603041  -0.052253 -3.866885  -5.253456 -6.617167   
 2   3.988598 -23.258278 -3.347317  -5.029393 -6.616476 -13.133641 -2.691286   
 3  -0.054400 -36.821192 -3.976578  -9.235794  0.619749 -19.861751 -2.909028   
 4  10.044390 -63.205298 -5.235099 -11.626388 -0.539007 -43.225806 -3.294430   
 5   0.480895 -69.818985 -5.864359 -12.175049 -3.775232 -48.156682 -0.792579   
 
     (0, 3)-1    (2, 4)-0    (2, 4)-1  (1, 3)-0   (1, 3)-1  (3, 4)-0  (3, 4)-1  \
 0   0.000000    0.000000    0.000000  0.000000   0.000000  0.000000  0.000000   
 1  -8.954496 -102.279982 -237.990519 -1.471228  -0.011996 -0.145684 -1.198314   
 2 -31.999512  -14.181487   -3.239400 -6.990511  -4.162668 -1.129052 -3.937319   
 3 -40.600220  -19.425445   -1.922570 -7.982937  -5.422265 -2.209542 -4.490387   
 4 -66.890326 

# For 2-Task Metrics

In [32]:
if data == 'NYUv2':
    baselines_2task = {'mIoU': [], 'Pixel Acc': [], 
                'Angle Mean': [], 'Angle Median': [], 'Angle 11.25': [], 'Angle 22.5': [], 'Angle 30': [],
                'abs_err': [],'rel_err': [],'sigma_1.25': [],'sigma_1.25^2': [],'sigma_1.25^3': []}
elif data == 'Taskonomy':
    baselines_2task = {'err': [], 'abs_err': [], 'cosine_similarity': [], 'key_err': [], 'edge_err': []}

metric_df = pd.DataFrame()

# For all two tasks
for two_task in set(itertools.combinations(tasks, 2)):
    print(two_task)

    metric_results = {}
    for task in two_task:
        for metric in metrics[task]:
            metric_results[metric] = []
            
    # For all branching points, get metrics results
    block = B
    for i in range(block+1):
        log = '_'.join(two_task) + '_b' + str(i) + '.stdout'
        if coarse:
            log = '2task_coarse_'+data+'_'+model+'/'+log
        else:
            log = '2task_fined_'+data+'_'+model+'/'+log
        # Read in content
        with open('./log/'+log) as f:
            lines = f.readlines()
            lines = [line.rstrip() for line in lines]
            if not confirm_complete(lines,iters):
                print(str(i) + ' not complete')
#                 continue
            avg_metric = extract_metric_results(lines, two_task, metrics, reduce_var_length)
            for task in two_task:
                for metric in metrics[task]:
                    metric_results[metric].append(avg_metric[metric])
    
    # Take down baselines
    for task in two_task:
        for metric in metrics[task]:
            baselines_2task[metric].append(metric_results[metric][0])
    
    # Compute relative performance for each task
    task_rel_perf = {}
    for task in two_task:
        temp = np.zeros((block + 1, 1))
        idx = 0
        for metric in metrics[task]:
            idx += 1
            temp += rel_perf(np.expand_dims(np.array(metric_results[metric]), axis=1), metrics_prop[metric])
        task_rel_perf[task] = temp/idx
        
    # Add results to dataframe
    task0_idx = tasks.index(two_task[0])
    task1_idx = tasks.index(two_task[1])
    col_name = '(' + str(task0_idx) + ', '+ str(task1_idx) + ')' + '-'
    idx = 0
    for task in two_task:
        metric_df[col_name+str(idx)] = np.squeeze(task_rel_perf[task])
        idx += 1

('segment_semantic', 'normal')
5 not complete
('normal', 'edge_texture')
('segment_semantic', 'depth_zbuffer')
('segment_semantic', 'keypoints2d')
('depth_zbuffer', 'edge_texture')
('normal', 'keypoints2d')
('keypoints2d', 'edge_texture')
('normal', 'depth_zbuffer')
('segment_semantic', 'edge_texture')
('depth_zbuffer', 'keypoints2d')


In [34]:
# Save to Excel
metric_df.to_excel("./2task/" + data + "_2task_metrics_" + model + "_" + date + "_val_acc.xlsx", index_label='branch')  

In [33]:
metric_df

Unnamed: 0,"(0, 1)-0","(0, 1)-1","(1, 4)-0","(1, 4)-1","(0, 2)-0","(0, 2)-1","(0, 3)-0","(0, 3)-1","(2, 4)-0","(2, 4)-1","(1, 3)-0","(1, 3)-1","(3, 4)-0","(3, 4)-1","(1, 2)-0","(1, 2)-1","(0, 4)-0","(0, 4)-1","(2, 3)-0","(2, 3)-1"
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3.352925,0.187127,0.091344,0.763262,1.693018,-1.132075,0.968835,0.6498,-75.449871,8.270194,0.160891,1.908186,1.354527,1.065879,0.030447,-0.380228,-2.500601,-0.356143,-5.398458,-2.133932
2,3.222682,0.043518,0.200087,0.954077,0.980525,-7.924528,0.873984,-0.096779,-11.311054,0.128419,0.191329,2.032633,1.94886,-0.06421,-0.178331,-0.887199,-1.882321,0.572373,-12.85347,-4.703075
3,3.112477,-0.091388,0.15659,2.22618,-1.652304,-15.471698,-1.886856,4.009401,-12.982005,0.757673,0.160891,2.903761,1.271596,-0.115577,-0.030447,-2.534854,-1.033902,1.399135,-18.637532,-3.116664
4,1.051964,-0.691936,0.187038,2.976721,1.45552,-22.893082,-0.203252,6.000277,-14.267352,0.847566,0.100013,2.654867,-4.381479,0.179787,-0.047845,-4.816223,-2.339161,6.092597,-21.722365,-0.898498
5,1.3926,-1.04008,0.108743,4.33787,0.332496,-24.90566,1.060298,7.258399,-17.737789,-2.529857,0.200026,5.129978,-3.41396,-0.33389,-0.017398,-7.604563,-2.631127,5.443907,-22.493573,-5.812158


# For Layout Results

In [5]:
def rel_perf_baselines(results, baselines, lower=True):
    # Function: Compute rel. perf. for layouts
    if lower:
        return (baselines - results)/baselines*100
    else:
        return (results - baselines)/baselines*100
    
def save_obj(obj, name):
    with open('./ntask/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('./ntask/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [37]:
# Layout index for T=3, B=17/5, NYUv2
# layout_idx = [383,342,415,344,278,251,231,161,65,484,329,481,268,374,79,55,368] # verify_1014 + verify_1012(xiao)
# layout_idx = [0,55,72,86,103,200,227,239,349,376,485,493] # verify_1102 (coarse)
# layout_idx = [484,492,487,495,379,397,267,486,490,387] # verify_1102 (flops)
# layout_idx = [484,487] # verify_1116 (similar layouts)
# layout_idx = [0,47,43,34,41,30,44,35,36,49,17,28,26,39,38,10,11,48,42,23,9,33,14,21,5,4,2] # verify_1118 (similar layouts, w/ coarse bps)
# layout_idx = [0,45,49,37,34,50] # top-5

# Layout index for T=3, B=9/6/5, NYUv2
# layout_idx = [11,18,20,25,24,131,137,111,113,106,110,101,115,125,68,42,66,67,96,100,46,61,91,35,72,88,86,77,84] # verify_1214
# layout_idx = [8,12,14,19,62,65,59,61,67,58,53,28,45,38,1,48,25,23,37] # verify_0105
# layout_idx = [7,11,10,16,15,0,39,31,49,38,48,40,17,4,1,27,6] # verify_0111
# layout_idx = [0,7,11,10,9,8,2] # top-5
# layout_idx = [11,18,20,25,24,131,137,111,113,106,110,101,115,125,68,42,66,67,96,100,46,61,91,35,72,88,86,77,84] # verify_0116
# layout_idx = [0,47,45,46,43,30,7,37,41,35,9,17,19,25,23,49,50,12,21] # verify_0123 or 0124

# Layout index for T=5, B=17/5, Taskonomy
# layout_idx = [480,360,350,483,1043,
#               2260,2083,2218,2091,2207,5301,2247,2255,7466,5300,1570,2202,6273,3773,6020] # verify_0123
layout_idx = [352,958,480,353,360,817,1,562,4697,6539,4] # under flops

base_2task = False # True: baseline from 2task baseline, False: baseline from policymtl
base_2layout = False # True: baseline from layout 2 (the last layout idx)

In [38]:
if base_2task:
    if data == 'Taskonomy':
        baselines = {'train_loss': [0.5483,0.1124,0.0234,0.0706,0.0711], 'val_loss': [0.6332,0.1023,0.0253,0.1065,0.0905]}
    elif data =='NYUv2':
        baselines = {'train_loss': [0.5333,0.0603,0.5743], 'val_loss': [1.609,0.0636,0.6785]}
    # baselines from 2task b0 - only has metrics no loss now
    for metric in baselines_2task:
        baselines[metric] = mean(baselines_2task[metric])
else:
    # baselines are from policymtl
    if data == 'NYUv2':
        baselines = {'train_loss': [0.5333, 0.0603, 0.5743], 'val_loss': [1.609, 0.0636, 0.6785],
                     'mIoU': 0.265, 'Pixel Acc': 0.582, 
                     'Angle Mean': 17.7, 'Angle Median': 16.3, 'Angle 11.25': 29.4, 'Angle 22.5': 72.3, 'Angle 30': 87.3,
                     'abs_err': 0.62,'rel_err': 0.24,'sigma_1.25': 57.8,'sigma_1.25^2': 85.8,'sigma_1.25^3': 96} 
    elif data == 'Taskonomy':
        baselines = {'train_loss': [0.5483,0.1124,0.0234,0.0706,0.0711], 'val_loss': [0.6332,0.1023,0.0253,0.1065,0.0905],
                     'err': 0.6226, 'abs_err': 0.022, 'cosine_similarity': 0.807, 'key_err': 0.2024, 'edge_err': 0.214}

In [51]:
loss_results = {'train_loss': [], 'val_loss': []}
metric_results = {}
for task in tasks:
    for metric in metrics[task]:
        metric_results[metric] = []
        
# For each layout
for idx in layout_idx:
    log = 'layout_' + str(idx) + '.stdout'
    with open('./log/layout_'+data+'_'+model+'/'+log) as f:
        lines = f.readlines()
        lines = [line.rstrip() for line in lines]
        if not confirm_complete(lines,iters):
                print(str(idx) + ' not complete')
#                 continue
        avg_loss,_ = extract_loss_results(lines, tasks, reduce_var_length)
        avg_metric = extract_metric_results(lines, tasks, metrics, reduce_var_length)
        for mode in loss_results:
                loss_results[mode].append(avg_loss[mode])
        for task in tasks:
            for metric in metrics[task]:
                metric_results[metric].append(avg_metric[metric])
                
# Compute rel. perf.
for mode in loss_results:
    loss_results[mode] = rel_perf_baselines(np.array(loss_results[mode]), np.array(baselines[mode]))
    
task_rel_perf = {}
for task in tasks:
    temp = np.zeros(len(layout_idx))
    idx = 0
    for metric in metrics[task]:
        idx += 1
        if base_2layout:
            temp += rel_perf_baselines(np.array(metric_results[metric]), metric_results[metric][-1], metrics_prop[metric])
        else:
            temp += rel_perf_baselines(np.array(metric_results[metric]), baselines[metric], metrics_prop[metric])
    task_rel_perf[task] = temp/idx

4 not complete


In [41]:
for key in task_rel_perf:
    print(key)
    for value in task_rel_perf[key]:
        print(value)

segment_semantic
0.872684441589046
3.2605203983296005
0.8887461184281062
0.8887461184281062
0.8459149801906124
4.754256344362362
0.5728664739265528
0.05889281507657201
4.19745154727487
5.284291680051403
0.567512581646866
normal
8.331268071045013
8.343659644774874
8.17430813713341
8.339529120198268
8.199091284593134
8.08343659644774
7.707558859975207
7.980173482032206
8.211482858322997
7.856257744733582
8.141263940520444
depth_zbuffer
8.787878787878782
7.87878787878788
3.9393939393939315
10.757575757575756
7.727272727272728
8.03030303030303
6.969696969696959
7.727272727272728
7.272727272727261
1.6666666666666565
0.1515151515151506
keypoints2d
1.6139657444005275
4.891304347826083
0.3129117259551972
0.9552042160737859
0.9552042160737859
5.237154150197627
0.559947299077727
2.4868247694334658
1.7786561264822094
3.0797101449275397
0.14822134387351518
edge_texture
1.1526479750778715
-1.355140186915894
-0.34267912772585785
2.6791277258567012
0.7165109034267902
1.5576323987538911
-0.48286604361

In [14]:
# Save all results to one dict
real_results = {}
real_results['layout'] = layout_idx

for mode in loss_results:
    real_results[mode] = loss_results[mode]
    
val_acc = None
for task in task_rel_perf:
    if val_acc is None:
        val_acc = np.expand_dims(task_rel_perf[task], axis=1)
    else:
        val_acc = np.concatenate((val_acc, np.expand_dims(task_rel_perf[task], axis=1)), axis=1)
real_results['val_acc'] = val_acc

In [15]:
real_results

{'layout': [0,
  47,
  45,
  46,
  43,
  30,
  7,
  37,
  41,
  35,
  9,
  17,
  19,
  25,
  23,
  49,
  50,
  12,
  21],
 'train_loss': array([[-33.41271329,  16.10281924,  24.8302281 ],
        [-31.85449091,  11.89054726,  25.93766324],
        [-30.09750609,  13.23383085,  24.32004179],
        [-30.99381211,   9.15422886,  24.17900052],
        [-38.67241703,   9.03814262,  24.48720181],
        [-36.60978811,  -0.94527363,  21.51140519],
        [-45.21845115,  12.66998342,  22.65192408],
        [-34.88280518,  12.18905473,  20.74525509],
        [-37.88111757,   8.95522388,  23.77154797],
        [-40.82692668,  12.3880597 ,  21.92234024],
        [ -5.97412338,  -1.07794362,   1.65070521],
        [-34.96531033,  -2.43781095,   4.64043183],
        [-39.75248453,   1.32669983,  -0.88107261],
        [-44.37464842,  12.33830846,  23.24917291],
        [-37.67297956,  12.68656716,  18.52167857],
        [-34.9615601 ,   9.51907131,  25.15061814],
        [-23.50646915,  16.99834

In [16]:
# Save to pickle
save_obj(real_results, 'real_results_'+data+'_'+model+'_'+date)

In [19]:
load_obj('real_results_'+data+'_'+model+'_'+date)

{'layout': [11,
  18,
  20,
  25,
  24,
  131,
  137,
  111,
  113,
  106,
  110,
  101,
  115,
  125,
  68,
  42,
  66,
  67,
  96,
  100,
  46,
  61,
  91,
  35,
  72,
  88,
  86,
  77,
  84],
 'train_loss': array([[-67.69548097,  10.58043118,  14.98519937],
        [-61.62947684,  10.58043118,  17.00156713],
        [-48.00112507,   5.73797678, -19.42016368],
        [-58.49615601,  10.29850746, -19.42016368],
        [-56.50478155,  13.25041459, -19.42016368],
        [-50.14250891,   6.65008292,  16.62197458],
        [-65.17719857,   6.53399668,  15.46230193],
        [-58.41740109,  -1.44278607,  14.7849556 ],
        [-60.61691356,   5.05804312,  14.07452551],
        [-48.57491093,   6.21890547,   3.32578792],
        [-50.39939996,   5.68822554,   2.50914156],
        [-66.64916557,  10.8291874 ,  15.13320564],
        [-63.56459779,  -0.8291874 ,  12.71635034],
        [-58.88618039,   7.47927032,  15.28295316],
        [-60.35252203,   9.66832504,  16.88838586],
        [-6