## Rules

### Hierarchy

- Models have three levels of complexity: easy, middle, hard.
   - The corresponding cost also have three levels: cheap, medium, expensive
   - In each level, the accuracy is uniformly distributed. The more the cost, the higher the accuracy
   - The relation between accuracy is: [C/avg(C) * 0.9 , C/avg(C) * 0.98] in each level

### Mapping

- Divide the models into two groups: cheap + medium; medium + expensive
- Divide the tasks into two groups as well
- Divide the models into smaller cluster: #model/3 (C+M); #model/5 (M+E)
- For each model in the small cluster, randomly assign up to 3 (C+M) / 5 (M+E) tasks.

### Selectivity & Cost

- Related to Task, assign to each task randomly
- Related to Model, assign three levels of cost randomly

In [117]:
M = 100
N = 40
num_of_model = M
num_of_task = N

### Cost

In [118]:
import numpy as np
c1 = np.random.uniform(5,10,M//3)
c2 = np.sort(np.random.uniform(15,30,M//3))
c3 = np.random.uniform(40,60,M-(M//3)-(M//3))
c3

array([45.17831236, 56.6648742 , 47.7438239 , 49.76110426, 58.32268248,
       53.43867654, 54.02909846, 55.70625408, 45.68004382, 49.0602708 ,
       50.61061689, 52.6950475 , 56.88090088, 59.7917546 , 40.53143323,
       50.51627452, 46.18873121, 44.2246767 , 46.43517799, 50.67039286,
       52.39553081, 47.59916806, 57.13792076, 49.24537302, 49.35709192,
       56.21395764, 59.78037104, 48.45597992, 48.8735755 , 56.90844844,
       46.14463763, 50.44931839, 54.14401675, 40.42383536])

### Selectivity

In [119]:
s = np.random.uniform(0.3,0.9,N)

### Mapping

In [120]:
g1 = c1.tolist() + c2.tolist()[:len(c1)//2]
g2 = c2.tolist()[len(c1)//2:] + c3.tolist()
print(len(g1),len(g2))

49 51


In [131]:
import random
import statistics

# g1 is devided into #model//3 clusters [a,b,c],[d,e,f]
# g2 is devided into #model//5 clusters [m,n,o],[x,y,z]
def mapping(cluster,task_range,lower=0,size=3):
    # average cost
    max_cost = max(cluster)
    for idx,item in enumerate(cluster):
        model_task_dict[str(idx+lower)]={}
        count = min(random.randint(1,size),len(task_range))
        random_accu = np.random.uniform(0.93*cluster[idx]/max_cost,0.98*cluster[idx]/max_cost,count)
        random_task = np.random.randint(task_range[0],task_range[-1]+1,count)
        model_task_dict[str(idx+lower)] = {str(random_task[i]):random_accu[i] for i in range(count)}
#     return model_task_dict

def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n))

In [132]:
model_task_dict = {}
l = [g1,g2]
count = 0
for i in range(2):
    size = 4
    if i==0: 
        task_idx = 0
        task_end = N//2
    else: 
        
        task_idx = N//2
        task_end = N
    task_range = range(task_idx, task_end)
    print(task_range)
    task_range_sub = list(split(task_range, size))
    print(task_range_sub)
    model_range_sub = list(split(l[i],size))
    for j in range(size):
        print((j+1)*(len(task_range)//size))
        mapping(model_range_sub[j],task_range_sub[j],count,size)
        count += len(model_range_sub[j])

range(0, 20)
[range(0, 5), range(5, 10), range(10, 15), range(15, 20)]
5
10
15
20
range(20, 40)
[range(20, 25), range(25, 30), range(30, 35), range(35, 40)]
5
10
15
20


In [133]:
model_task_dict

{'0': {'1': 0.8219084240197865},
 '1': {'4': 0.574001209706049},
 '2': {'3': 0.9264150373509622},
 '3': {'0': 0.6122621302398744, '3': 0.6079324289834667},
 '4': {'2': 0.7372862379262736, '4': 0.7397317989166851},
 '5': {'4': 0.9009935277961709,
  '2': 0.8857062553815764,
  '3': 0.8687414472872008},
 '6': {'3': 0.8702386848380604, '2': 0.830764726520044},
 '7': {'0': 0.58653684273254},
 '8': {'2': 0.9373450187965956,
  '4': 0.9771815889168156,
  '0': 0.9470996663218467},
 '9': {'0': 0.8440919988442565,
  '3': 0.8307999693645663,
  '2': 0.8239880041094723,
  '4': 0.8182963075921207},
 '10': {'0': 0.5674526011265874, '2': 0.5561740559712278},
 '11': {'0': 0.7399386766867744,
  '1': 0.7388285039024215,
  '2': 0.7360559024775157},
 '12': {'2': 0.7889176068739172},
 '13': {'6': 0.5633127938710234, '5': 0.5633554094542121},
 '14': {'6': 0.9530163841462732},
 '15': {'6': 0.8152564173454192},
 '16': {'9': 0.7932234892120443, '7': 0.8059802627649976},
 '17': {'7': 0.6776391455347142},
 '18': {'

## Get table from dict

In [134]:
import pandas as pd
df = pd.DataFrame.from_dict(model_task_dict, orient='index')

In [135]:
df

Unnamed: 0,1,4,3,0,2,6,5,9,7,8,...,33,30,31,32,34,35,36,37,39,38
0,0.821908,,,,,,,,,,...,,,,,,,,,,
11,0.738829,,,0.739939,0.736056,,,,,,...,,,,,,,,,,
1,,0.574001,,,,,,,,,...,,,,,,,,,,
4,,0.739732,,,0.737286,,,,,,...,,,,,,,,,,
5,,0.900994,0.868741,,0.885706,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,,,,,,,,,,,...,,,,,,,,0.933995,,
94,,,,,,,,,,,...,,,,,,,,0.781976,0.766519,
97,,,,,,,,,,,...,,,,,,,,0.789394,,
91,,,,,,,,,,,...,,,,,,,,,0.912686,
