In [27]:
import flatbuffers
import numpy as np
import FlatBufTaskGraph.TaskGraph

In [61]:
buf = []
with open("/home/weiyangw/Documents/new_1024_8.fbuf", "rb") as f:
    buf = f.read()
tg = FlatBufTaskGraph.TaskGraph.TaskGraph.GetRootAs(buf, 0)

In [29]:
def get_all_routes(tg):
    result = {}
    for i in range(tg.RoutesLength()):
        r = tg.Routes(i)
        src, dst = r.Fromnode(), r.Tonode()
        for j in range(r.PathsLength()):
            p = r.Paths(j)
            path = [p.Hopnode(k) for k in range(p.HopnodeLength())]
            result[(src,dst)] = path
    return result
        

In [7]:
DEVICE_COMP_GPU = 0
DEVICE_COMP_CPU = 1
DEVICE_MEM_SYSTEM = 2
DEVICE_MEM_Z_COPY = 3
DEVICE_MEM_GPU_FB = 4
DEVICE_COMM_MEMBUS_COMM = 5
DEVICE_COMM_UPI_IN_COMM = 6
DEVICE_COMM_UPI_OUT_COMM = 7
DEVICE_COMM_NIC_IN_COMM = 8
DEVICE_COMM_NIC_OUT_COMM = 9
DEVICE_COMM_PCI_TO_HOST_COMM = 10
DEVICE_COMM_PCI_TO_DEV_COMM = 11
DEVICE_COMM_NVLINK_COMM = 12
DEVICE_COMM_NW_COMM = 13
DEVICE_COMM_NW_NOMINAL = 14
def get_dev(tg):
    result = {}
    for i in range(tg.DevicesLength()):
        d = tg.Devices(i)
        result[d.Deviceid()] = (d.Nodeid(), d.Type(), d.Deviceproperty(), d.Bandwidth())
    return result

def get_nwnominal(tg, devices):
    nnodes = tg.Nnode()
    nominal_comms = {}
    for devid, dev in devices.items():
        if dev[1] == DEVICE_COMM_NW_NOMINAL:
            src, dst = dev[2] // nnodes, dev[2] % nnodes
            nominal_comms[devid] = (src, dst)
    return nominal_comms

In [32]:
TASK_FORWARD = 0
TASK_BACKWARD = 1
TASK_COMM = 2
TASK_UPDATE = 3
TASK_BARRIER = 4
TASK_NOMINAL_COMM = 5
TASK_ALLREDUCE = 6
def get_tasks(tg):
    result = {}
    for i in range(tg.TasksLength()):
        t = tg.Tasks(i)
        nexttasks = [t.Nexttasks(j) for j in range(t.NexttasksLength())]
        result[t.Taskid()] = (t.Deviceid(), t.Runtime(), t.Xfersize(), t.Type(), nexttasks)
    return result

In [33]:
def get_ring(tg):
    rings = {}
    for i in range(tg.RingsLength()):
        r = tg.Rings(i)
        rsz = r.Ringsz()
        rings[rsz] = [list(r.Ringpaths(j).JumpsAsNumpy()) for j in range(r.RingpathsLength())]
    return rings

In [34]:
def get_logical_tm(tasks, rings, nom_devs, tg):
    nnodes = tg.Nnode()
    result = {} # (src, dst) -> traffic
    tot = 0
    totr = 0
    for tid, t in tasks.items():
        if t[3] == TASK_NOMINAL_COMM:
            ndev = nom_devs[t[0]]
            # print(ndev, t[2])
            if ndev in result:
                result[ndev] += t[2]
            else:
                result[ndev] = t[2]
            tot += t[2]
        # elif t[3] == TASK_ALLREDUCE:
        #     ringsz = len(t[-1])
        #     if ringsz == 1: 
        #         continue
        #     nsplit = len(rings[ringsz])
        #     totr += 2 * (len(t[-1]) - 1) * t[2]
        #     xfersize = 2 * (len(t[-1]) - 1) * t[2] / nsplit / len(t[-1])
        #     curr_node = t[-1][0]
        #     for rdesc in rings[ringsz]:
        #         total_hop = sum(rdesc)
        #         for j in range(len(t[-1])):
        #             ndev = (curr_node, (curr_node + total_hop) % nnodes)
        #             if ndev in result:
        #                 result[ndev] += xfersize
        #             else:
        #                 result[ndev] = xfersize
        #             curr_node = (curr_node + total_hop) % nnodes
    print(totr, tot)
    return result
            

In [35]:
def get_physical_tm(tasks, rings, nom_devs, routes, tg):
    nnodes = tg.Nnode()
    result = {}
    for tid, t in tasks.items():
        if t[3] == TASK_NOMINAL_COMM:
            ndev = nom_devs[t[0]]
            path = routes[ndev]
            for j in range(len(path) - 1):
                npdev = (path[j], path[(j + 1)])
                # print(npdev)
                if npdev in result:
                    result[npdev] += t[2]
                    # print(t, npdev, result[npdev])
                else:
                    result[npdev] = t[2]
        # elif t[3] == TASK_ALLREDUCE:
        #     ringsz = len(t[-1])
        #     if ringsz == 1: 
        #         continue
        #     nsplit = len(rings[ringsz])
        #     xfersize = 2 * (len(t[-1]) - 1) * t[2] / nsplit / len(t[-1])
        #     # print(t, nsplit, xfersize)
        #     curr_node = t[-1][0]
        #     for rdesc in rings[ringsz]:
        #         # print(rdesc)print(t[2])
        #         for j in range(len(t[-1])):
        #             for l in rdesc:
        #                 npdev = (curr_node, (curr_node + l) % nnodes)
        #                 # print(l, npdev)
        #                 if npdev in result:
        #                     result[npdev] += xfersize
        #                 else:
        #                     result[npdev] = xfersize
        #                 curr_node = (curr_node + l) % nnodes
    return result

In [43]:
def get_hop_to_traffic(tasks, rings, nom_devs, routes, tg):
    nnodes = tg.Nnode()
    result = {}
    for tid, t in tasks.items():
        if t[3] == TASK_NOMINAL_COMM:
            ndev = nom_devs[t[0]]
            path = routes[ndev]
            pathlen = len(path) - 1
            if pathlen in result:
                result[pathlen] += 1 # t[2]
            else:
                result[pathlen] = 1 #t[2]
        # elif t[3] == TASK_ALLREDUCE:
        #     ringsz = len(t[-1])
        #     if ringsz == 1: 
        #         continue
        #     nsplit = len(rings[ringsz])
        #     xfersize = 2 * (len(t[-1]) - 1) * t[2] / nsplit / len(t[-1])
        #     # print(xfersize)
        #     for rdesc in rings[ringsz]:
        #         hoplen = len(rdesc)
        #         for j in range(len(t[-1])):
        #             if hoplen in result:
        #                 result[hoplen] += xfersize
        #             else:
        #                 result[hoplen] = xfersize
    return result

In [41]:
tasks = get_tasks(tg)
rings = get_ring(tg)
devs = get_dev(tg)
ndevs = get_nwnominal(tg, devs)
routes = get_all_routes(tg)
r = get_hop_to_traffic(tasks, rings, ndevs, routes, tg)
a = sum([k * v for k, v in r.items() if k != 1]) / r[1]
print(a)

2.1809625895009903


In [44]:
all_route_cnt = {}
for v in routes.values():
    for i in range(len(v)-1):
        link = (v[i], v[i+1])
        if link in all_route_cnt:
            all_route_cnt[link] += 1
        else:
            all_route_cnt[link] = 1
distribution = [(v,k) for k, v in all_route_cnt.items()]
# sorted(distribution)

In [45]:
tasks = get_tasks(tg)
rings = get_ring(tg)
devs = get_dev(tg)
ndevs = get_nwnominal(tg, devs)
routes = get_all_routes(tg)
ptm = get_physical_tm(tasks, rings, ndevs,routes , tg)
ltm = get_logical_tm(tasks, rings, ndevs , tg)
# print(ptm)
# print(max([v for v in get_physical_tm(tasks, rings, ndevs,routes , tg).values() if v != 0]), min([v for v in get_physical_tm(tasks, rings, ndevs,routes , tg).values() if v != 0]))
# print(max([v for v in get_logical_tm(tasks, rings, ndevs, tg).values() if v != 0]), min([v for v in get_logical_tm(tasks, rings, ndevs , tg).values() if v != 0]))

0 136365211648


In [46]:
(sum(ptm.values())-sum(ltm.values()))/sum(ltm.values())

4.622047244094488

In [62]:
tasks = get_tasks(tg)
rings = get_ring(tg)
devs = get_dev(tg)
ndevs = get_nwnominal(tg, devs)
routes = get_all_routes(tg)
r = get_hop_to_traffic(tasks, rings, ndevs, routes, tg)

In [63]:
hops = {}
for k, v in routes.items():
    if len(v)-2 in hops:
        hops[len(v)-2] += 1
    else:
        hops[len(v)-2] = 1

hcdf = cdf(hops)
for i in range(2*len(hcdf)):
    print(f"{hcdf[(i+1)//2]}")

0.06299212598425197
0.2992125984251969
0.2992125984251969
0.7086614173228346
0.7086614173228346
0.968503937007874
0.968503937007874
1.0
1.0


IndexError: list index out of range

In [48]:
def cdf(m):
    n = 0
    res = []
    # total = sum(m.values())
    for i in sorted(m):
        n += m[i]
        res.append(n/total)
    
    return res


In [49]:
for i in [64, 128, 256, 512, 1024]:
    buf = []
    with open(f"/home/weiyangw/Documents/tg_d8_b{i}.fbuf", "rb") as f:
        buf = f.read()
    tg = FlatBufTaskGraph.TaskGraph.TaskGraph.GetRootAs(buf, 0)
    tasks = get_tasks(tg)
    rings = get_ring(tg)
    devs = get_dev(tg)
    ndevs = get_nwnominal(tg, devs)
    routes = get_all_routes(tg)
    r = get_hop_to_traffic(tasks, rings, ndevs, routes, tg)
    print(",".join([str(x) for x in cdf(r)]))

0.06299212598425197,0.2992125984251969,0.7086614173228346,0.968503937007874,1.0
0.06299212598425197,0.2992125984251969,0.7086614173228346,0.968503937007874,1.0
0.06666666666666667,0.30196078431372547,0.7098039215686275,0.9686274509803922,1.0
0.06457925636007827,0.3013698630136986,0.7103718199608611,0.9686888454011742,1.0
0.06457925636007827,0.3013698630136986,0.7103718199608611,0.9686888454011742,1.0


In [50]:
for i in [64, 128, 256, 512, 1024]:
    buf = []
    with open(f"/home/weiyangw/Documents/tg_d4_b{i}.fbuf", "rb") as f:
        buf = f.read()
    tg = FlatBufTaskGraph.TaskGraph.TaskGraph.GetRootAs(buf, 0)
    tasks = get_tasks(tg)
    rings = get_ring(tg)
    devs = get_dev(tg)
    ndevs = get_nwnominal(tg, devs)
    routes = get_all_routes(tg)
    r = get_hop_to_traffic(tasks, rings, ndevs, routes, tg)
    print(",".join([str(x) for x in cdf(r)]))

0.031496062992125984,0.09448818897637795,0.1889763779527559,0.31496062992125984,0.47244094488188976,0.6377952755905512,0.7795275590551181,0.889763779527559,0.968503937007874,1.0
0.031496062992125984,0.09448818897637795,0.1889763779527559,0.31496062992125984,0.47244094488188976,0.6377952755905512,0.7795275590551181,0.889763779527559,0.968503937007874,1.0
0.031496062992125984,0.09448818897637795,0.1889763779527559,0.31496062992125984,0.47244094488188976,0.6377952755905512,0.7795275590551181,0.889763779527559,0.968503937007874,1.0
0.03522504892367906,0.09980430528375733,0.19373776908023482,0.31898238747553814,0.4755381604696673,0.639921722113503,0.7808219178082192,0.8904109589041096,0.9686888454011742,1.0
0.03522504892367906,0.09980430528375733,0.19373776908023482,0.31898238747553814,0.4755381604696673,0.639921722113503,0.7808219178082192,0.8904109589041096,0.9686888454011742,1.0


In [26]:
for i in [64, 128, 256, 512, 1024]:
    buf = []
    with open(f"/home/weiyangw/Documents/tg_d8_b{i}.fbuf", "rb") as f:
        buf = f.read()
    tg = FlatBufTaskGraph.TaskGraph.TaskGraph.GetRootAs(buf, 0)
    tasks = get_tasks(tg)
    rings = get_ring(tg)
    devs = get_dev(tg)
    ndevs = get_nwnominal(tg, devs)
    routes = get_all_routes(tg)
    r = get_hop_to_traffic(tasks, rings, ndevs, routes, tg)
    print(",".join([str(x) for x in ccdf(r)]))

0.9774266761500128,1.9662348601235486,2.9789442105264823,3.9969649312470605,5.0
0.9559153884273898,1.9340583121014738,2.958879059793616,3.994072657267548,5.0
0.9099622198411313,1.8736848145083558,2.9212304180361097,3.988645826023223,5.0
0.83776034035679,1.7636626755830827,2.855860203240232,3.979223092358952,5.0
0.7254541413178842,1.5969374893065844,2.7541761610606095,3.9645659331258534,5.0
