In [1]:
import flatbuffers
import numpy as np

In [2]:
import FlatBufTaskGraph.TaskGraph

In [3]:
buf = []
with open("taskgraph.fbuf", "rb") as f:
    buf = f.read()

In [4]:
tg = FlatBufTaskGraph.TaskGraph.TaskGraph.GetRootAs(buf, 0)

In [5]:
rings = {}
for i in range(tg.RingsLength()):
    r = tg.Rings(i)
    rsz = r.Ringsz()
    rings[rsz] = [list(r.Ringpaths(j).JumpsAsNumpy()) for j in range(r.RingpathsLength())]

In [6]:
rings

{6: [[1, 1], [11, 11]],
 4: [[11, 5, 11], [1, 7, 1]],
 3: [[5, 11], [7, 1]],
 2: [[5, 1], [7, 11]],
 12: [[5], [7], [11], [1]]}

In [7]:
nic_mac = {i: [] for i in range(12)}
with open("mac.csv", "r") as f:
    for line in f.readlines():
        sped = line.split(",")
        nic_mac[int(sped[0])].append((sped[1].strip(), sped[2].strip()))

In [8]:
nic_mac

{0: [('enp65s0f0', '14:02:ec:ca:e4:dc'),
  ('enp65s0f1', '14:02:ec:ca:e4:dd'),
  ('enp65s0f2', '14:02:ec:ca:e4:de'),
  ('enp65s0f3', '14:02:ec:ca:e4:df'),
  ('enp65s0f4', '14:02:ec:ca:e4:e0'),
  ('enp65s0f5', '14:02:ec:ca:e4:e1'),
  ('enp65s0f6', '14:02:ec:ca:e4:e2'),
  ('enp65s0f7', '14:02:ec:ca:e4:e3'),
  ('enp1s0f0', '14:02:ec:ca:e7:e8'),
  ('enp1s0f1', '14:02:ec:ca:e7:e9'),
  ('enp1s0f2', '14:02:ec:ca:e7:ea'),
  ('enp1s0f3', '14:02:ec:ca:e7:eb'),
  ('enp1s0f4', '14:02:ec:ca:e7:ec'),
  ('enp1s0f5', '14:02:ec:ca:e7:ed'),
  ('enp1s0f6', '14:02:ec:ca:e7:ee'),
  ('enp1s0f7', '14:02:ec:ca:e7:ef')],
 1: [('enp65s0f0', '14:02:ec:ca:e8:88'),
  ('enp65s0f1', '14:02:ec:ca:e8:89'),
  ('enp65s0f2', '14:02:ec:ca:e8:8a'),
  ('enp65s0f3', '14:02:ec:ca:e8:8b'),
  ('enp65s0f4', '14:02:ec:ca:e8:8c'),
  ('enp65s0f5', '14:02:ec:ca:e8:8d'),
  ('enp65s0f6', '14:02:ec:ca:e8:8e'),
  ('enp65s0f7', '14:02:ec:ca:e8:8f'),
  ('enp1s0f0', '14:02:ec:ca:e4:b4'),
  ('enp1s0f1', '14:02:ec:ca:e4:b5'),
  ('enp1s0f2', 

In [9]:
def generate_netplan(machine, ifnames):
    with open("01-patchpanel-ifconfig_{:d}.yaml".format(machine), "w") as f:
        f.write("network:\n")
        f.write("  version: 2\n")
  get_all_hop_rule      f.write("  ethernets:\n")
        for interface_info in ifnames:
            f.write("    {:s}:\n".format(interface_info[0]))
            f.write("      match:\n")
            f.write("        macaddress: {:s}\n".format(interface_info[1]))
            f.write("      dhcp4: false\n")
            if int(interface_info[0][-1]) < 4:
                f.write("      addresses: [10.100.{:d}.{:d}/24]\n".format(machine, int(interface_info[0][-1])+1 if interface_info[0][3] == "6" else int(interface_info[0][-1])+5))
                f.write("      set-name: pp{:d}rdma\n".format(int(interface_info[0][-1])+1 if interface_info[0][3] == "6" else int(interface_info[0][-1])+5))
            else:
                f.write("      set-name: pp{:d}eth\n".format(int(interface_info[0][-1])-3 if interface_info[0][3] == "6" else int(interface_info[0][-1])+1))
            f.write("      mtu: 9000\n")

In [10]:
for i in range(12): 
    generate_netplan(i, nic_mac[i])

In [11]:
def generate_pp_allocation(ring_desc):
    hops = sorted(list(set([k for i in ring_desc.values() for j in i for k in j])))
    pp_allocation = {i: [] for i in range(12)}
    pp_hop_to_alloc = {i: set(hops) for i in range(12)}
    done = lambda: all(len(i) == 4 for i in pp_allocation.values())
    curr = 0
    while not done():
        for h in pp_hop_to_alloc[curr]:
            local_port = len(pp_allocation[curr])
            remote_machine = (curr + h) % 12
            remote_port = len(pp_allocation[remote_machine])
            pp_allocation[curr].append((local_port, remote_machine, remote_port))
            pp_allocation[remote_machine].append((remote_port, curr, local_port))
            pp_hop_to_alloc[remote_machine].remove(12 - h)
        curr += 1
    return pp_allocation

In [12]:
pp_allocation = generate_pp_allocation(rings)

In [13]:
def get_all_hop_rule(ring_desc):
    # this is more of a hack for now, really should make all coin change to output this rule
    return {sum(j)%12: j for i in ring_desc.values() for j in i}

In [14]:
hop_rule = get_all_hop_rule(rings)
print(hop_rule)
hop_rule[3] = [5, 11, 11]
hop_rule[9] = [1, 1, 7]
hop_rule[8] = [1, 7]
hop_rule[-6] = [1, 5]

{2: [1, 1], 10: [11, 11], 3: [11, 5, 11], 9: [1, 7, 1], 4: [5, 11], 8: [7, 1], 6: [7, 11], 5: [5], 7: [7], 11: [11], 1: [1]}


In [20]:
machine_port_mac = {}
with open("port_map.csv", "r") as f:
    for line in f.readlines():
        sped = line.split(",")
        machine_port_mac[(int(sped[0]), int(sped[1]))] = ((sped[2].strip(), sped[3].strip()))

In [21]:
def find_incoming_if(src, dst, pp_allocation, hop_rule):
    hops = (dst - src) % 12
    if hops == 6 and dst - src == -6:
        hops = -6
    curr = src
#     print("sss", src, dst, hops)
    for i in range(len(hop_rule[hops]) + 1):
        print(curr, hops, hop_rule[hops][i])
        if (curr + hop_rule[hops][i]) % 12 == dst:
            for sp, dm, dp in pp_allocation[curr]:
                if dm == dst:
                    return dp
        else:
            curr = (curr + hop_rule[hops][i]) % 12
    raise Exception('you fool!')

In [22]:
def find_outgoing_if(src, dst, pp_allocation, hop_rule):
    hops = (dst - src) % 12
    if hops == 6 and dst - src == -6:
        hops = -6
    next_hop = hop_rule[hops][0]
    print("ggg", src, dst, hops, next_hop)
    for sp, dm, dp in pp_allocation[src]:
        if dm == (src+next_hop)%12:
            return sp
    raise Exception('you fool!')

In [23]:
def generate_tc_rules(machine_port_mac, pp_allocation, hop_rule):
    for m in range(12): 
        with open("tc_{:d}.sh".format(m), "w") as f:
            f.write("#!/bin/bash\n\n")
            for i in range(4):
                f.write("tc qdisc del dev pp{:d}eth parent ffff: >/dev/null 2>&1\n".format(i+1))
                f.write("tc qdisc add dev pp{:d}eth ingress\n".format(i+1))
            rules = set()
            for src in range(12):
                for dst in range(12):
                    if src == m or dst == m or src == dst:
                        continue
                    src_to_dst_hop = (dst - src) % 12
                    if src_to_dst_hop == 6 and dst - src == -6:
                        src_to_dst_hop = -6
                    src_to_dst_links = hop_rule[src_to_dst_hop]
                    node_on_the_way = [(src + i) % 12 for i in np.cumsum(src_to_dst_links)]
#                     print(src, dst, m, src_to_dst_links, node_on_the_way)
                    if m not in node_on_the_way: continue
                    src_to_me_hop = (m - src) % 12
                    if src_to_me_hop == 6 and m - src == -6:
                        src_to_me_hop = -6
                    me_to_dst_hop = (dst - m) % 12
                    if me_to_dst_hop == 6 and dst - m == -6:
                        me_to_dst_hop = -6
                    src_incoming_if = find_incoming_if(src, m, pp_allocation, hop_rule)
                    next_hop_id = (m + hop_rule[me_to_dst_hop][0]) % 12
                    me_to_next_hop_dist = len(hop_rule[me_to_dst_hop])
                    dst_outgoing_if = find_outgoing_if(m, dst, pp_allocation, hop_rule)
#                     print(m ,src, dst, me_to_next_hop_dist, next_hop_id)
                    next_hop_if = find_incoming_if(src, next_hop_id, pp_allocation, hop_rule)
                    dst_to_src_hop = (src - dst) % 12
                    dst_to_src_links = hop_rule[dst_to_src_hop]
                    rev_node_on_the_way = [(dst + i) % 12 for i in np.cumsum(dst_to_src_links)]
                    rev_first_hop = rev_node_on_the_way[0]
                    rev_first_hop_if = find_incoming_if(dst, rev_first_hop, pp_allocation, hop_rule)

                    rules.add("tc filter add dev pp{:d}eth prio 0 protocol ip parent ffff: flower skip_hw ".format(src_incoming_if+1) \
                          + " dst_ip 10.100.{:d}.0/24 ".format(dst) \
#                           + "action pedit ex munge eth src set {:s} pipe ".format(machine_port_mac[rev_first_hop, rev_first_hop_if][1]) \
                          + "action pedit ex munge eth dst set {:s} pipe ".format(machine_port_mac[next_hop_id, next_hop_if][0] if me_to_next_hop_dist == 1 else machine_port_mac[next_hop_id, next_hop_if][1]) \
                          + "action mirred egress redirect dev pp{:d}rdma\n".format(dst_outgoing_if+1))
                    rules.add("tc filter add dev pp{:d}eth prio 0 protocol 802.1Q parent ffff: flower skip_hw ".format(src_incoming_if+1) \
                          + "vlan_ethtype ip dst_ip 10.100.{:d}.0/24 ".format(dst) \
                          + "action pedit ex munge eth dst set {:s} pipe ".format(machine_port_mac[next_hop_id, next_hop_if][0] if me_to_next_hop_dist == 1 else machine_port_mac[next_hop_id, next_hop_if][1]) \
                          + "action mirred egress redirect dev pp{:d}rdma\n".format(dst_outgoing_if+1))
            for r in rules:
                f.write(r)
            for dst in range(12):
                if (dst == m): continue
                me_to_dst_hop = (dst - m) % 12
                me_to_dst_hop = (dst - m) % 12
                if me_to_dst_hop == 6 and dst - m == -6:
                    me_to_dst_hop = -6
                next_hop_id = (m + hop_rule[me_to_dst_hop][0]) % 12
                me_to_next_hop_dist = len(hop_rule[me_to_dst_hop])
                dst_outgoing_if = find_outgoing_if(m, dst, pp_allocation, hop_rule)
                print(m, dst)
                dst_incoming_if = find_incoming_if(m, dst, pp_allocation, hop_rule)
                next_hop_if = find_incoming_if(m, next_hop_id, pp_allocation, hop_rule)
                print("fff", m, dst, next_hop_id, me_to_next_hop_dist, dst_outgoing_if, dst_incoming_if, next_hop_if)
                f.write("arp -d 10.100.{:d}.{:d} > /dev/null 2>&1\n".format(dst, dst_incoming_if+1))
                f.write("ip r d 10.100.{:d}.{:d}/32 > /dev/null 2>&1\n".format(dst, dst_incoming_if+1))
                if me_to_next_hop_dist == 1:                    
                    f.write("arp -s 10.100.{:d}.{:d} {:s} -i pp{:d}rdma\n".format(dst, dst_incoming_if+1, machine_port_mac[next_hop_id, next_hop_if][0], dst_outgoing_if+1))
                    f.write("ip r a 10.100.{:d}.{:d}/32 src 10.100.{:d}.{:d} dev pp{:d}rdma\n".format(dst, dst_incoming_if+1, m, dst_outgoing_if+1, dst_outgoing_if+1))
                else: 
                    f.write("arp -s 10.100.{:d}.{:d} {:s} -i pp{:d}rdma\n".format(dst, dst_incoming_if+1, machine_port_mac[next_hop_id, next_hop_if][1], dst_outgoing_if+1))
                    f.write("ip r a 10.100.{:d}.{:d}/32 src 10.100.{:d}.{:d} dev pp{:d}rdma\n".format(dst, dst_incoming_if+1, m, dst_outgoing_if+1, dst_outgoing_if+1))

In [24]:
generate_tc_rules(machine_port_mac, pp_allocation, hop_rule)

1 11 11
ggg 0 11 11 11
1 10 11
0 10 11
11 1 1
5 7 7
ggg 0 11 11 11
5 6 7
0 6 11
11 7 7
7 5 5
ggg 0 10 10 11
7 4 5
0 4 11
10 1 1
7 5 5
ggg 0 11 11 11
7 4 5
0 4 11
11 1 1
8 4 5
1 4 11
ggg 0 11 11 11
8 3 5
1 3 11
0 3 11
11 1 1
10 2 1
11 2 1
ggg 0 7 7 7
10 9 1
11 9 1
0 9 7
7 5 5
11 1 1
ggg 0 1 1 1
11 2 1
0 2 1
1 11 11
11 1 1
ggg 0 5 5 5
11 -6 1
0 -6 5
5 7 7
11 1 1
ggg 0 7 7 7
11 8 1
0 8 7
7 5 5
11 1 1
ggg 0 8 8 1
11 2 1
0 2 1
8 5 5
ggg 0 1 1 1
0 1
0 1 1
0 1 1
fff 0 1 1 1 0 0 0
ggg 0 2 2 1
0 2
0 2 1
1 2 1
0 1 1
fff 0 2 1 2 0 0 0
ggg 0 3 3 5
0 3
0 3 5
5 3 11
4 3 11
0 5 5
fff 0 3 5 3 2 1 0
ggg 0 4 4 5
0 4
0 4 5
5 4 11
0 5 5
fff 0 4 5 2 2 1 0
ggg 0 5 5 5
0 5
0 5 5
0 5 5
fff 0 5 5 1 2 0 0
ggg 0 6 6 7
0 6
0 6 7
7 6 11
0 7 7
fff 0 6 7 2 3 2 0
ggg 0 7 7 7
0 7
0 7 7
0 7 7
fff 0 7 7 1 3 0 0
ggg 0 8 8 1
0 8
0 8 1
1 8 7
0 1 1
fff 0 8 1 2 0 0 0
ggg 0 9 9 1
0 9
0 9 1
1 9 1
2 9 7
0 1 1
fff 0 9 1 3 0 0 0
ggg 0 10 10 11
0 10
0 10 11
11 10 11
0 11 11
fff 0 10 11 2 1 3 0
ggg 0 11 11 11
0 11
0 11 11
0 11 11
f

In [210]:
def generate_pp_reconf_entries(pp_allocation, hop_rule):
    done = set()
    for src in range(12):
        for conn in pp_allocation[src]:
            src_port = chr(src+0x41)+str(1+conn[0])
            if src_port not in done:
                done.add(src_port)
                dst_port_n = find_incoming_if(src, conn[1], pp_allocation, hop_rule)
                dst_port = chr(conn[1] + 0x41) + str(1+dst_port_n)
                done.add(dst_port)
                print("connect {:s} {:s}".format(src_port, dst_port))
                print("yes")

In [149]:
find_outgoing_if(1, 0, pp_allocation, hop_rule)

0

In [259]:
def generate_nccl_ifmap(pp_allocation, hop_rule):
    for i in range(12):
        with open("if_map_{:d}.txt".format(i), "w") as f:
            for j in range(12):
                if i != j: 
                    f.write("{:d}, {:d}, {:d}\n".format(find_outgoing_if(i, j, pp_allocation, hop_rule), j, find_incoming_if(i, j, pp_allocation, hop_rule)))

In [260]:
generate_pp_reconf_entries(pp_allocation, hop_rule)

0 1 1
connect A1 B1
yes
0 11 11
connect A2 L1
yes
0 5 5
connect A3 F1
yes
0 7 7
connect A4 H1
yes
1 1 1
connect B2 C1
yes
1 5 5
connect B3 G1
yes
1 7 7
connect B4 I1
yes
2 1 1
connect C2 D1
yes
2 5 5
connect C3 H2
yes
2 7 7
connect C4 J1
yes
3 1 1
connect D2 E1
yes
3 5 5
connect D3 I2
yes
3 7 7
connect D4 K1
yes
4 1 1
connect E2 F2
yes
4 5 5
connect E3 J2
yes
4 7 7
connect E4 L2
yes
5 1 1
connect F3 G2
yes
5 5 5
connect F4 K2
yes
6 1 1
connect G3 H3
yes
6 5 5
connect G4 L3
yes
7 1 1
connect H4 I3
yes
8 1 1
connect I4 J3
yes
9 1 1
connect J4 K3
yes
10 1 1
connect K4 L4
yes


In [102]:
pp_allocation

{0: [(0, 1, 0), (1, 11, 0), (2, 5, 0), (3, 7, 0)],
 1: [(0, 0, 0), (1, 2, 0), (2, 6, 0), (3, 8, 0)],
 2: [(0, 1, 1), (1, 3, 0), (2, 7, 1), (3, 9, 0)],
 3: [(0, 2, 1), (1, 4, 0), (2, 8, 1), (3, 10, 0)],
 4: [(0, 3, 1), (1, 5, 1), (2, 9, 1), (3, 11, 1)],
 5: [(0, 0, 2), (1, 4, 1), (2, 6, 1), (3, 10, 1)],
 6: [(0, 1, 2), (1, 5, 2), (2, 7, 2), (3, 11, 2)],
 7: [(0, 0, 3), (1, 2, 2), (2, 6, 2), (3, 8, 2)],
 8: [(0, 1, 3), (1, 3, 2), (2, 7, 3), (3, 9, 2)],
 9: [(0, 2, 3), (1, 4, 2), (2, 8, 3), (3, 10, 2)],
 10: [(0, 3, 3), (1, 5, 3), (2, 9, 3), (3, 11, 3)],
 11: [(0, 0, 1), (1, 4, 3), (2, 6, 3), (3, 10, 3)]}

In [261]:
generate_nccl_ifmap(pp_allocation, hop_rule)

ggg 0 1 1 1
0 1 1
ggg 0 2 2 1
0 2 1
1 2 1
ggg 0 3 3 5
0 3 5
5 3 11
4 3 11
ggg 0 4 4 5
0 4 5
5 4 11
ggg 0 5 5 5
0 5 5
ggg 0 6 6 7
0 6 7
7 6 11
ggg 0 7 7 7
0 7 7
ggg 0 8 8 1
0 8 1
1 8 7
ggg 0 9 9 1
0 9 1
1 9 1
2 9 7
ggg 0 10 10 11
0 10 11
11 10 11
ggg 0 11 11 11
0 11 11
ggg 1 0 11 11
1 11 11
ggg 1 2 1 1
1 1 1
ggg 1 3 2 1
1 2 1
2 2 1
ggg 1 4 3 5
1 3 5
6 3 11
5 3 11
ggg 1 5 4 5
1 4 5
6 4 11
ggg 1 6 5 5
1 5 5
ggg 1 7 6 7
1 6 7
8 6 11
ggg 1 8 7 7
1 7 7
ggg 1 9 8 1
1 8 1
2 8 7
ggg 1 10 9 1
1 9 1
2 9 1
3 9 7
ggg 1 11 10 11
1 10 11
0 10 11
ggg 2 0 10 11
2 10 11
1 10 11
ggg 2 1 11 11
2 11 11
ggg 2 3 1 1
2 1 1
ggg 2 4 2 1
2 2 1
3 2 1
ggg 2 5 3 5
2 3 5
7 3 11
6 3 11
ggg 2 6 4 5
2 4 5
7 4 11
ggg 2 7 5 5
2 5 5
ggg 2 8 6 7
2 6 7
9 6 11
ggg 2 9 7 7
2 7 7
ggg 2 10 8 1
2 8 1
3 8 7
ggg 2 11 9 1
2 9 1
3 9 1
4 9 7
ggg 3 0 9 1
3 9 1
4 9 1
5 9 7
ggg 3 1 10 11
3 10 11
2 10 11
ggg 3 2 11 11
3 11 11
ggg 3 4 1 1
3 1 1
ggg 3 5 2 1
3 2 1
4 2 1
ggg 3 6 3 5
3 3 5
8 3 11
7 3 11
ggg 3 7 4 5
3 4 5
8 4 11
ggg 3 8 5 5
3 

In [34]:
find_incoming_if(0, 4, pp_allocation, hop_rule)

0 4 5
5 4 11


1

In [35]:
src, dst = 0, 8
src_to_dst_hop = (dst - src) % 12
src_to_dst_links = hop_rule[src_to_dst_hop]
node_on_the_way = [(src + i) % 12 for i in np.cumsum(src_to_dst_links)]

In [36]:
node_on_the_way

[1, 8]

In [246]:
-6 % 12

6