In [None]:
# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

# SPDX-License-Identifier: Apache-2.0

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

# http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

In [1]:
### This notebook reads the dgl graph and does preprocessing according to data_graph.py for the model to read
import os, sys
ROOT_DIR = '/raid/andlai/2024_ICCAD_Contest_Gate_Sizing_Benchmark'
sys.path.append(ROOT_DIR)
import time

import pickle as pk

import numpy as np
import pandas as pd

import dgl
import networkx as nx
# import graph_tool as gt
# from graph_tool.all import *

import torch

import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
## load all datasets in design_names = ['NV_NVDLA_partition_m', 'NV_NVDLA_partition_p', 'ariane136', 'mempool_tile_wrap']
design_names = ['NV_NVDLA_partition_m', 'NV_NVDLA_partition_p', 'ariane136', 'mempool_tile_wrap']
dataset_dir = '0709_v1'

# read all the graph for all the designs
gs = {}
for design_name in design_names:
    design_dir = f'{ROOT_DIR}/datasets/{dataset_dir}/{design_name}'
    gs[design_name] = dgl.load_graphs(f'{design_dir}/graph.dgl')[0]

# identical function taken from data_graph.py
def gen_topo(g_hetero):
    torch.cuda.synchronize()
    time_s = time.time()
    na, nb = g_hetero.edges(etype='net_out', form='uv')
    ca, cb = g_hetero.edges(etype='cell_out', form='uv')
    g = dgl.graph((torch.cat([na, ca]).cpu(), torch.cat([nb, cb]).cpu()))
    topo = dgl.topological_nodes_generator(g)

    ### inspect the topography!
    g.ndata['fanout'] = g_hetero.ndata['nf'][:, 1].cpu()
    for li, nodes in enumerate(topo):
        # print(f'level {li}, # nodes = {len(nodes)}')
        # print(g.ndata['fanout'][nodes.numpy()])
        assert (li % 2 == 0 and (g.ndata['fanout'][nodes] == 0).sum() == 0) or (li % 2 == 1 and (g.ndata['fanout'][nodes] == 1).sum() == 0)

    assert len(topo) % 2 == 0

    ret = [t.cuda() for t in topo]
    torch.cuda.synchronize()
    time_e = time.time()
    return ret, time_e - time_s

data = {}
# data preprocessing
for design_name, des_gs in gs.items():
    for gi, g in enumerate(des_gs):
        g.nodes['node'].data['nf'] = g.nodes['node'].data['nf'].type(torch.float32)
        g.edges['cell_out'].data['ef'] = g.edges['cell_out'].data['ef'].type(torch.float32)
        g.edges['net_out'].data['ef'] = g.edges['net_out'].data['ef'].type(torch.float32)
        g.edges['net_in'].data['ef'] = g.edges['net_in'].data['ef'].type(torch.float32)
        g.ndata['n_tsrf'] = torch.stack([g.ndata['n_tran'], g.ndata['n_slack'], g.ndata['n_risearr'], g.ndata['n_fallarr']], axis=1).type(torch.float32)

        g = g.to('cuda')

        # print(f'{design_name}, {gi+1}/{len(des_gs)}')
        topo, topo_time = gen_topo(g)

        ts = {'input_nodes': (g.ndata['nf'][:, 1] < 0.5).nonzero().flatten().type(torch.int32),
            'output_nodes': (g.ndata['nf'][:, 1] > 0.5).nonzero().flatten().type(torch.int32),
            'output_nodes_nonpi': torch.logical_and(g.ndata['nf'][:, 1] > 0.5, g.ndata['nf'][:, 0] < 0.5).nonzero().flatten().type(torch.int32),
            'pi_nodes': torch.logical_and(g.ndata['nf'][:, 1] > 0.5, g.ndata['nf'][:, 0] > 0.5).nonzero().flatten().type(torch.int32),
            'po_nodes': torch.logical_and(g.ndata['nf'][:, 1] < 0.5, g.ndata['nf'][:, 0] > 0.5).nonzero().flatten().type(torch.int32),
            'endpoints': (g.ndata['n_is_end'] > 0.5).nonzero().flatten().type(torch.long),
            'topo': topo,
            'topo_time': topo_time}

        # set nans to zero
        g.ndata['nf'][torch.isnan(g.ndata['nf'])] = 0
        g.ndata['n_tsrf'][torch.isnan(g.ndata['n_tsrf'])] = 0

        # normalize
        g.ndata['nf'][:,2:] = (g.ndata['nf'][:,2:] - g.ndata['nf'][:,2:].mean(axis=0)) / g.ndata['nf'][:,2:].std(axis=0)
        g.ndata['n_tsrf'] = (g.ndata['n_tsrf'] - g.ndata['n_tsrf'].mean(axis=0)) / g.ndata['n_tsrf'].std(axis=0)
        
        data[f'{design_name}_{gi}'] = g, ts

        # just for report
        print(gi, design_name, len(g.ndata['nf']), g.ndata['train_mask'].sum().item(), len(g.edata['ef'][('node', 'cell_out', 'node')]), len(g.edata['ef'][('node', 'net_out', 'node')]), len(topo))

data_train = {k: t for k, t in data.items()}
data_test = data_train # set identical --- just testing if prediction training works!

0 NV_NVDLA_partition_m 83671 80415 54280 55966 86
1 NV_NVDLA_partition_m 265 30 15 249 16
2 NV_NVDLA_partition_m 458 390 264 298 16
0 NV_NVDLA_partition_p 273679 234273 171386 193361 82
1 NV_NVDLA_partition_p 5139 396 198 4940 26
0 ariane136 499589 454282 329085 352154 190
1 ariane136 17777 1167 583 17193 28
2 ariane136 3595 313 156 3438 26
0 mempool_tile_wrap 657377 617005 447458 468169 184
1 mempool_tile_wrap 9077 559 279 8797 22
2 mempool_tile_wrap 4163 51 33 4129 6
3 mempool_tile_wrap 903 102 67 835 8
4 mempool_tile_wrap 903 102 67 835 8


In [3]:
print('[node data] ( = dstdata)')
for nkey, ndat in g.ndata.items():
    assert type(ndat) == torch.Tensor, 'Type must be torch.Tensor'
    print(f'{nkey:22s} {ndat.shape}')
    if nkey == 'nf':
        nf = ndat
        for fkey, frange in [('is_prim IO', [0,1]), ('fanout(1) or in(0)', [1,2]), ('dis to tlrb', [2,6]), ('RF cap', [6,8])]:
            print(f'  {fkey:20s} {ndat[:, frange[0]:frange[1]].shape}')
print()

print('[edge data]')
for ekey, edat in g.edata.items():
    assert type(edat) == dict, 'Type must be dict'
    print(f'{ekey}:')
    for edat_key, edat_dat in edat.items():
        print(f'  {f"{edat_key}":30s} {edat_dat.shape}')

[node data] ( = dstdata)
_ID                    torch.Size([903])
train_mask             torch.Size([903])
n_is_end               torch.Size([903])
n_fallarr              torch.Size([903])
n_slack                torch.Size([903])
n_risearr              torch.Size([903])
n_tran                 torch.Size([903])
nf                     torch.Size([903, 8])
  is_prim IO           torch.Size([903, 1])
  fanout(1) or in(0)   torch.Size([903, 1])
  dis to tlrb          torch.Size([903, 4])
  RF cap               torch.Size([903, 2])
n_tsrf                 torch.Size([903, 4])

[edge data]
_ID:
  ('node', 'cell_out', 'node')   torch.Size([67])
  ('node', 'net_in', 'node')     torch.Size([835])
  ('node', 'net_out', 'node')    torch.Size([835])
ef:
  ('node', 'cell_out', 'node')   torch.Size([67, 256])
  ('node', 'net_in', 'node')     torch.Size([835, 5])
  ('node', 'net_out', 'node')    torch.Size([835, 5])


In [4]:
with open('data/data_train.pk', 'wb') as pkf:
    pk.dump(data_train, pkf)
with open('data/data_test.pk', 'wb') as pkf:
    pk.dump(data_test, pkf)