In [9]:
import sys
sys.path.append('../')
import pandas as pd
import numpy as np
import re
import itertools
import collections
import pickle
import os
from statistics import mean, stdev
from scipy import stats, spatial
from ptflops import get_model_complexity_info

In [2]:
# Information for DomainNet
data = 'DomainNet'
iters = 40000
tasks = ('real', 'painting', 'quickdraw', 'clipart', 'infograph', 'sketch') # task 0, 1, 2, 3, 4, 5
reduce_var_length = {'val_acc': 3}

In [3]:
date = '1220'
model = 'effnetv2'
B = 4
coarse = True

## 2task val acc.

In [4]:
def confirm_complete(lines, iters):
    rev_lines = lines[::-1]
    for line in rev_lines:
        if 'Iter' in line:
            it = int(line.split(' ')[1])
            break
    return it == iters

def extract_metric_results(lines, tasks, reduce_var_length):
    # Function: Extract val metrics
    metric = 'Val Acc'
    metric_queue = {task: collections.deque(reduce_var_length['val_acc']*[0], reduce_var_length['val_acc'])
                    for task in tasks}
    
    for line in lines:
        for task in tasks:
            if task[:4] in line and metric in line:
                value = float(re.findall("\d+\.\d+", line.split(metric)[1])[0])
                metric_queue[task].append(value)
                
    avg_metric = {task: mean(metric_queue[task]) for task in tasks}
    return avg_metric

def rel_perf(results, lower=True):
    # Function: Compute rel. perf.
    if lower:
        return (results[0] - results)/results[0]*100
    else:
        return (results - results[0])/results[0]*100

In [5]:
baselines_2task = {task: [] for task in tasks}

metric_df = pd.DataFrame()

# For all two tasks
for two_task in set(itertools.combinations(tasks, 2)):
    print(two_task)
    metric_results = {task: [] for task in two_task}
            
    # For all branching points, get metrics results
    block = B
    for i in range(block+1):
        folder = '../log/'+'_'.join(['2task_coarse',data,model])+'/'+'_'.join(['2task',model,date])+'/'
        log = '_'.join(two_task) + '_b' + str(i) + '.stdout'
        # Read in content
        with open(folder+log) as f:
            lines = f.readlines()
            lines = [line.rstrip() for line in lines]
            if not confirm_complete(lines,iters):
                print(str(i) + ' not complete')
            avg_metric = extract_metric_results(lines, two_task, reduce_var_length)
            for task in two_task:
                metric_results[task].append(avg_metric[task])
                # Take down 2task baselines when branching point=0
                if i==0:
                    baselines_2task[task].append(avg_metric[task])     
    
    # Compute relative performance for each task
    task_rel_perf = {task: rel_perf(np.array(metric_results[task]), lower=False)
                     for task in two_task}
        
    # Add results to dataframe
    task0_idx = tasks.index(two_task[0])
    task1_idx = tasks.index(two_task[1])
    col_name = '(' + str(task0_idx) + ', '+ str(task1_idx) + ')' + '-'
    for idx, task in enumerate(two_task):
        metric_df[col_name+str(idx)] = task_rel_perf[task]

('clipart', 'sketch')
('real', 'painting')
('clipart', 'infograph')
('infograph', 'sketch')
('painting', 'sketch')
('painting', 'quickdraw')
('painting', 'infograph')
('painting', 'clipart')
('quickdraw', 'clipart')
('quickdraw', 'sketch')
('real', 'sketch')
('real', 'quickdraw')
('quickdraw', 'infograph')
('real', 'infograph')
('real', 'clipart')


In [6]:
metric_df

Unnamed: 0,"(3, 5)-0","(3, 5)-1","(0, 1)-0","(0, 1)-1","(3, 4)-0","(3, 4)-1","(4, 5)-0","(4, 5)-1","(1, 5)-0","(1, 5)-1",...,"(0, 5)-0","(0, 5)-1","(0, 2)-0","(0, 2)-1","(2, 4)-0","(2, 4)-1","(0, 4)-0","(0, 4)-1","(0, 3)-0","(0, 3)-1"
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.158591,-0.316,-0.21638,-0.232452,-0.368362,-0.42165,-2.611997,-0.195622,-1.303572,-0.612007,...,-0.647423,-0.004686,-3.20626,0.185586,-1.641167,-4.155209,-0.783634,-0.149869,-2.126704,-0.678752
2,0.225366,-0.241647,-0.244958,0.460346,-0.438724,0.413983,-1.275626,-0.097811,-0.571742,-0.28498,...,-1.237113,0.046856,-3.651685,-0.659862,-2.57822,-4.842652,-1.130083,-1.236418,-1.86954,0.111746
3,0.663578,-0.83647,0.0,0.232452,-0.326973,-1.07329,-1.860289,-1.537028,-0.164662,-0.1822,...,-1.703093,-0.506044,-4.530498,-4.943809,-8.66642,-11.831653,-0.519673,-2.038217,-1.791983,-0.049665
4,0.538375,-0.348529,-1.645301,0.683683,-2.446091,-5.021466,-2.741078,-4.177923,-1.372181,-1.149264,...,-2.239175,-0.707525,-10.766453,-15.486133,-20.244587,-22.609227,-2.470511,-2.203072,-1.943016,0.335237


In [20]:
# Save to Excel
metric_df.to_excel("../2task/" + data + "_2task_metrics_" + model + "_" + date + "_val_acc.xlsx", index_label='branch')  

## layout val acc.

In [7]:
def rel_perf_baselines(results, baselines, lower=True):
    # Function: Compute rel. perf. for layouts
    if lower:
        return (baselines - results)/baselines*100
    else:
        return (results - baselines)/baselines*100
    
def save_obj(obj, name):
    with open('../ntask/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('../ntask/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [20]:
date = '1228'
folder = '../log/'+'_'.join(['layout',data,model])+'/verify_'+date+'/'
layout_idx = []
for filename in os.listdir(folder):
    if 'stdout' in filename:
        idx = int(filename.split('.')[0].split('_')[1])
        if idx != 5:
            layout_idx.append(idx)
layout_idx.append(5) # ind. layout

In [22]:
baselines = {task: mean(baselines_2task[task]) for task in tasks} # baseline from 2task baseline
base_2layout = False # True: baseline from ind. layout (the last layout idx)

In [28]:
metric_results = {task: [] for task in tasks}

# For each layout
incomplete = []
for idx in layout_idx:
    log = 'layout_' + str(idx) + '.stdout'
    with open(folder+log) as f:
        lines = f.readlines()
        lines = [line.rstrip() for line in lines]
        if not confirm_complete(lines,iters):
            print(str(idx) + ' not complete')
            incomplete.append(idx)
            continue
        avg_metric = extract_metric_results(lines, tasks, reduce_var_length)
        for task in tasks:
            metric_results[task].append(avg_metric[task])
                
# Compute rel. perf.
task_rel_perf = {}
for task in tasks:
    if base_2layout:
        task_rel_perf[task] = rel_perf_baselines(np.array(metric_results[task]), metric_results[task][-1], lower=False)
    else:
        task_rel_perf[task] = rel_perf_baselines(np.array(metric_results[task]), baselines[task], lower=False)

# show
for key in task_rel_perf:
    print(key, task_rel_perf[key])

10 not complete
10500 not complete
2500 not complete
3500 not complete
39500 not complete
40500 not complete
4500 not complete
5500 not complete
6500 not complete
7500 not complete
8500 not complete
9500 not complete
real [ -4.57323051   0.84308215   0.45503562  -4.00137246  -4.01362656
  -0.16992353 -13.02039082  -4.02996536  -5.04297105  -4.77746553
  -5.88850402 -14.20903862  -4.30772499  -3.73995164  -2.95977387
  -4.31997909  -1.47702765 -11.30073198  -2.93526567  -1.03179531
  -4.09940527  -2.1305797   -0.9501013   -2.53088033  -2.97202797
  -0.21485524  -3.2661264   -3.00470557  -0.9460166   -3.65417293
  -3.3314816   -0.83981439  -3.64191883  -1.92225998   0.06698909
   0.64701653  -0.46402196   0.12009019  -0.25570224  -0.65600288
  -0.57430887  -0.59064767  -2.55947324  -2.49003333  -1.18701392
  -0.54571597  -0.41092085  -3.68276583  -0.41909026  -0.9378472
  -1.18701392  -0.15766943  -1.68534736  -0.80713679  -1.04404941
  -2.61257434  -2.76779295  -0.81122149  -1.74253317 

In [33]:
# Save all results to one dict
real_results = {'layout': [idx for idx in layout_idx if idx not in incomplete], 'val_acc': []}
for item in zip(*list(task_rel_perf.values())):
    real_results['val_acc'].append(np.array(item))
real_results['val_acc'] = np.array(real_results['val_acc'])

In [34]:
# Save to pickle
save_obj(real_results, 'real_results_'+data+'_'+model+'_'+date)

In [35]:
load_obj('real_results_'+data+'_'+model+'_'+date)

{'layout': [0,
  100,
  1000,
  10000,
  10320,
  1100,
  11000,
  11100,
  11200,
  11300,
  11400,
  11500,
  11600,
  11700,
  11800,
  11900,
  1200,
  12000,
  12500,
  1300,
  13000,
  13500,
  1400,
  14000,
  14500,
  1500,
  15000,
  15500,
  1600,
  16000,
  16500,
  1700,
  17000,
  17500,
  17530,
  17535,
  17547,
  17550,
  1800,
  18000,
  18500,
  1900,
  19000,
  19500,
  200,
  2000,
  20000,
  21000,
  21100,
  21200,
  21300,
  21400,
  21500,
  21600,
  21700,
  21800,
  21900,
  22000,
  22500,
  23000,
  23500,
  24000,
  24500,
  25000,
  25500,
  26000,
  26500,
  27000,
  27500,
  28000,
  28500,
  29000,
  29500,
  300,
  3000,
  30000,
  30500,
  31000,
  31500,
  32000,
  32500,
  33000,
  33500,
  34000,
  34500,
  35000,
  35500,
  36000,
  36500,
  37000,
  37500,
  38000,
  38500,
  39000,
  400,
  4000,
  40000,
  500,
  5000,
  600,
  6000,
  700,
  7000,
  711,
  800,
  8000,
  900,
  9000,
  5],
 'val_acc': array([[-4.57323051e+00, -4.19792018e+00, 

## top layout flops

In [62]:
fined_B = 42
mapping = {0: [0, 1, 2, 3, 4, 5, 6], 1: [7, 8, 9, 10],
           2: [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
           3: [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], 4: [42]}
tasks = ['real', 'painting', 'quickdraw', 'clipart', 'infograph', 'sketch']
cls_num = {task: 345 for task in tasks}

In [75]:
layout_idx = [0,5,711,17535,17550,17530,17547] #top 5
# layout enumerate
with open('../ntask/DomainNetLayout.pkl', 'rb') as f:
    layout_list = pickle.load(f)

In [76]:
import torch
from main.efficientnet import EffNetV2_FC
from main.algorithms import coarse_to_fined

def prepare_input(resolution):
    x1 = torch.FloatTensor(1, *resolution)
    x2 = tasks[1]
    return dict(x = x1, task=x2)

for idx in layout_idx:
    layout = coarse_to_fined(layout_list[idx], fined_B, mapping)
    with torch.no_grad():
        model = EffNetV2_FC(tasks=tasks, layout=layout, cls_num=cls_num, verbose=False)
        macs, params = get_model_complexity_info(model, (3,224,224), input_constructor=prepare_input,
                                                 as_strings=True, print_per_layer_stat=False, verbose=False)
        print('Layout idx: '+str(idx))
        print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
        print('{:<30}  {:<8}'.format('Parameters: ', params))
        print('='*80)

Layout idx: 0
Computational complexity:       2.88 GMac
Parameters:                     22.83 M 
Layout idx: 5
Computational complexity:       2.88 GMac
Parameters:                     123.72 M
Layout idx: 711
Computational complexity:       2.88 GMac
Parameters:                     122.81 M
Layout idx: 17535
Computational complexity:       2.88 GMac
Parameters:                     98.25 M 
Layout idx: 17550
Computational complexity:       2.88 GMac
Parameters:                     117.53 M
Layout idx: 17530
Computational complexity:       2.88 GMac
Parameters:                     102.64 M
Layout idx: 17547
Computational complexity:       2.88 GMac
Parameters:                     121.91 M
