In [1]:
import numpy as np
import pandas as pd
import itertools
from copy import deepcopy
from tqdm.notebook import tqdm
from pathlib import Path

GPU = "cuda:0"

In [2]:
rootdir = Path().resolve().parent
inputdir = rootdir / "data" / "predict-ai-model-runtime"

In [3]:
dataset_dict = {}
ignores = []
for ds in ["train", "valid", "test"]:
    records = []
    for arch, perm in itertools.product(["nlp", "xla"], ["default", "random"]):
        datadir = inputdir / f"npz_all/npz/layout/{arch}/{perm}/{ds}"
        for filepath in sorted(datadir.glob("*.npz")):
            filename = str(filepath).split("/")[-1].replace(".npz", "")

            # if (ds != "test") and (("mlperf" in filename) or ("openai" in filename)):
            #     ignores.append(filepath)
            #     continue
            records.append(
                {
                    "arch": arch,
                    "perm": perm,
                    "filename": filename,
                    "filepath": filepath,
                }
            )
    dataset_dict[ds] = pd.DataFrame(records)

In [4]:
row = dataset_dict["train"].iloc[0]
fileobj = np.load(row["filepath"])
node_feat = fileobj["node_feat"]
edge_index = fileobj["edge_index"]
node_config_ids = fileobj["node_config_ids"]
node_splits = fileobj["node_splits"]

indice = np.isin(edge_index, node_config_ids).any(axis=1)
# edge_index = edge_index[indice]

In [5]:
node_feat, edge_index, node_config_ids, node_splits

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([[   2,    0],
        [   2,    1],
        [   5,    3],
        ...,
        [1695, 1684],
        [1695, 1686],
        [1695, 1694]]),
 array([ 393,  403,  405,  411,  416,  426,  429,  433,  434,  438,  454,
         462,  468,  473,  478,  485,  506,  510,  513,  541,  560,  584,
         586,  589,  599,  601,  603,  621,  632,  650,  652,  655,  665,
         667,  669,  687,  698,  716,  718,  721,  731,  733,  735,  753,
         764,  782,  784,  787,  797,  799,  801,  819,  830,  848,  850,
         853,  863,  865,  867,  885,  896,  914,  916,  919,  929,  931,
         933,  951,  962,  980,  982,  985,  995,  997,  999, 1017, 1028,
        1046, 1048, 1051, 1061, 1063, 1065, 1083, 1094, 1112, 1114, 111

In [7]:
indice = np.isin(edge_index[:, 0], node_config_ids)

edge_index[indice, :]

array([[ 393,  392],
       [ 403,  402],
       [ 405,  404],
       [ 411,  410],
       [ 416,  415],
       [ 426,  424],
       [ 426,  425],
       [ 429,  426],
       [ 433,  412],
       [ 434,  408],
       [ 438,  428],
       [ 454,  453],
       [ 462,  444],
       [ 468,  466],
       [ 468,  467],
       [ 473,  471],
       [ 473,  472],
       [ 478,  471],
       [ 478,  477],
       [ 485,  476],
       [ 485,  484],
       [ 506,  471],
       [ 506,  505],
       [ 510,  504],
       [ 510,  509],
       [ 513,  511],
       [ 513,  512],
       [ 541,  539],
       [ 541,  540],
       [ 560,  558],
       [ 560,  559],
       [ 584,  472],
       [ 584,  583],
       [ 586,  477],
       [ 586,  583],
       [ 589,  585],
       [ 589,  588],
       [ 599,  505],
       [ 599,  583],
       [ 601,  598],
       [ 601,  600],
       [ 603,  512],
       [ 603,  602],
       [ 621,  540],
       [ 621,  620],
       [ 632,  559],
       [ 632,  631],
       [ 650,

In [36]:
graph = {i: [] for i in range(node_feat.shape[0])}
for edge in edge_index:
    graph[edge[0]].append(edge[1])

In [37]:
def dfs_all_paths(graph, node, visited, path, i_all_paths):
    """
    深さ優先探索を用いてすべての経路を取得する関数

    Parameters:
    - graph: 隣接リスト形式の有向グラフ
    - node: 現在のノード
    - visited: ノードの訪問状態を保持する辞書
    - path: 現在の探索パスを表すリスト
    - i_all_paths: すべての経路を格納するリスト
    """
    visited[node] = True
    path.append(node)

    # ゴールノードに到達した場合、現在の経路をコピーして結果リストに追加
    if not graph[node]:
        i_all_paths.append(path.copy())

    for neighbor in graph[node]:
        if not visited[neighbor]:
            dfs_all_paths(graph, neighbor, visited, path, i_all_paths)

    # バックトラックして、探索パスから現在のノードを削除
    path.pop()
    visited[node] = False

In [39]:
start_nodes = node_config_ids

In [40]:
all_paths = []
i_start = 0
for i_start in range(start_nodes.shape[0]):
    print(i_start)
    start_node = start_nodes[i_start]
    i_all_paths = []
    visited = {node: False for node in graph}

    dfs_all_paths(graph, start_node, visited, [], i_all_paths)
    all_paths.append(deepcopy(i_all_paths))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120


In [41]:
sum(all_paths, [])

[[393, 392],
 [403, 402],
 [405, 404, 403, 402],
 [411, 410],
 [416, 415],
 [426, 424],
 [426, 425],
 [429, 426, 424],
 [429, 426, 425],
 [433, 412, 411, 410],
 [434, 408],
 [438, 428],
 [454, 453],
 [462, 444],
 [468, 466],
 [468, 467],
 [473, 471, 468, 466],
 [473, 471, 468, 467],
 [473, 472],
 [478, 471, 468, 466],
 [478, 471, 468, 467],
 [478, 477],
 [485, 476, 473, 471, 468, 466],
 [485, 476, 473, 471, 468, 467],
 [485, 476, 473, 472],
 [485, 484],
 [506, 471, 468, 466],
 [506, 471, 468, 467],
 [506, 505],
 [510, 504],
 [510, 509, 506, 471, 468, 466],
 [510, 509, 506, 471, 468, 467],
 [510, 509, 506, 505],
 [513, 511, 510, 504],
 [513, 511, 510, 509, 506, 471, 468, 466],
 [513, 511, 510, 509, 506, 471, 468, 467],
 [513, 511, 510, 509, 506, 505],
 [513, 512],
 [541, 539],
 [541, 540],
 [560, 558],
 [560, 559],
 [584, 472],
 [584, 583],
 [586, 477],
 [586, 583],
 [589, 585, 584, 472],
 [589, 585, 584, 583],
 [589, 588],
 [599, 505],
 [599, 583],
 [601, 598],
 [601, 600, 599, 505],
 

In [22]:
for path in all_paths:
    print(path)

[2, 0]
[2, 1]
