In [5]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [6]:
import copy
import os
import random
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from tools.dataset import Dataset
from tools.common import Accumulator
from model.MGN import MGN
from config.config import load_train_config

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [3]:
torch.cuda.is_available()

True

In [7]:
def accumulate(model, dataloader, config):
    node_accumulator = Accumulator(config["model"]["node_feat_size"])
    edge_accumulator = Accumulator(config["model"]["edge_feat_size"])
    output_accumulator = Accumulator(config["model"]["output_feat_size"])
    for i, (_, _, nodes, edges, output, path) in enumerate(dataloader):
        nodes = nodes.cuda()
        edges = edges.cuda()
        output = output.cuda()

        node_accumulator.accumulate(nodes)
        edge_accumulator.accumulate(edges)
        output_accumulator.accumulate(output)

    model.node_normalizer.set_accumulated(node_accumulator)
    model.edge_normalizer.set_accumulated(edge_accumulator)
    model.output_normalizer.set_accumulated(output_accumulator)


def train(model, train_dataloader, valid_dataloader, criterion, optimizer, scheduler, config):
    log = open(os.path.join(config['log_root'], 'log.txt'), 'a')

    accumulate(model, train_dataloader, config)
    for epoch in range(config['last_epoch'] + 1, config['max_epoch'] + 1):

        print('-' * 20)
        log.write('-' * 20 + '\n')

        print('Epoch: %d / %d' % (epoch, config['max_epoch']))
        log.write('Epoch: %d / %d' % (epoch, config['max_epoch']) + '\n')

        epoch_loss = 0.
        epoch_mse = 0.
        model.train()
        start = time.perf_counter()
        for i, (senders, receivers, nodes, edges, output, _) in enumerate(train_dataloader):
            senders = senders.cuda()
            receivers = receivers.cuda()
            nodes = nodes.cuda()
            edges = edges.cuda()
            output = output.cuda()

            optimizer.zero_grad()

            prediction = model(senders, receivers, nodes, edges)
            print(prediction.size())
            loss = criterion(prediction, model.output_normalizer(output))
            loss.backward()
            epoch_loss += loss.item()
            epoch_mse += torch.mean((output - model.output_normalize_inverse(prediction)) ** 2)

            optimizer.step()
            scheduler.step()

        end = time.perf_counter()
        print('Train Loss: %f, MSE: %f' % (epoch_loss / len(train_dataloader), epoch_mse / len(train_dataloader)))
        print('Train Time: %f' % (end - start))

        log.write('Train Loss: %f, MSE: %f' % (epoch_loss / len(train_dataloader), epoch_mse / len(train_dataloader)) + '\n')

        if epoch % config['eval_steps'] == 0:

            epoch_loss = 0.
            epoch_mse = 0.
            epoch_time = 0.

            model.eval()
            for i, (senders, receivers, nodes, edges, output, _) in enumerate(valid_dataloader):
                senders = senders.cuda()
                receivers = receivers.cuda()
                nodes = nodes.cuda()
                edges = edges.cuda()
                output = output.cuda()

                with torch.no_grad():
                    start = time.perf_counter()
                    prediction = model(senders, receivers, nodes, edges)
                    end = time.perf_counter()

                    loss = criterion(prediction, model.output_normalizer(output))

                    epoch_loss += loss.item()
                    epoch_mse += torch.mean((output - model.output_normalize_inverse(prediction)) ** 2)
                    epoch_time += end - start

            print('Valid Loss: %f, MSE: %f, Time Used: %f' % (
                epoch_loss / len(valid_dataloader), epoch_mse / len(valid_dataloader), epoch_time / len(valid_dataloader)))
            log.write(
                'Valid Loss: %f, MSE: %f' % (epoch_loss / len(valid_dataloader), epoch_mse / len(valid_dataloader)) + '\n')

        print('-' * 20)
        log.write('-' * 20 + '\n')

        if epoch % config['save_steps'] == 0:
            torch.save(copy.deepcopy((model.state_dict())), os.path.join(config['ckpt_root'], '%d.pkl' % epoch))

    return

In [8]:
config = load_train_config()
random.seed(config['seed'])

model = MGN(config['model'])
model.cuda()

MGN(
  (node_normalizer): Normalizer()
  (edge_normalizer): Normalizer()
  (output_normalizer): Normalizer()
  (epd_component): EncodeProcessDecode(
    (encoder): Encoder(
      (node_mlp): MLP(
        (net): Sequential(
          (0): Linear(in_features=3, out_features=64, bias=True)
          (1): ReLU()
          (2): Linear(in_features=64, out_features=64, bias=True)
          (3): ReLU()
          (4): Linear(in_features=64, out_features=64, bias=True)
          (5): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
      )
      (edge_mlp): MLP(
        (net): Sequential(
          (0): Linear(in_features=4, out_features=64, bias=True)
          (1): ReLU()
          (2): Linear(in_features=64, out_features=64, bias=True)
          (3): ReLU()
          (4): Linear(in_features=64, out_features=64, bias=True)
          (5): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
      )
    )
    (process): Process(
      (blocks): ModuleList(
        (0): Gr

In [8]:
train_dataset = Dataset(config['dataset'], parts=[0, 1, 2, 3, 4, 5], npart=7, ids=['JOB1'])
valid_dataset = Dataset(config['dataset'], parts=[6], npart=7, ids=['JOB1'])

train_dataloader = data.DataLoader(train_dataset, batch_size=config['batch_size'], num_workers=4, pin_memory=True)
valid_dataloader = data.DataLoader(valid_dataset, batch_size=config['batch_size'], num_workers=4, pin_memory=True)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=config['lr'])
scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=config['decayRate'])
train(model, train_dataloader, valid_dataloader, criterion, optimizer, scheduler, config)

[142 195  67 131 196  63  72  66 150 105 181   8   4  69  12 191 178 172
 102  34 188 101  62  32  49  24 136  82 112 123 122  22  43 118 164 169
  15 171  27 121  51 175 152  79 120  71  35 138  17 174 165 187  85 180
  73 119 103 194  99  57 156 115 173 182  98 166 143  56  92 104 141 151
  54  97  77 126  38  87  86  80 197  50 140  76  96   5  84 135  47 155
 154  30 199 184  40  28 117 125 192 149 158  74  68  13 186 130 185  41
  81  70 111 168  60  42  46 163   3 189  33 148  44  61  14 177 176 137
 167 139 183  94  75   1 179  52   0  53  36 147  93  59   2  45  89  88
 153 190  31  18  90  20  64  83 127 157 159 110 114   7  39 162  25  16
 106  26  21 108 144 109]
168
['data/CarModel\\Model164.npz' 'data/CarModel\\Model225.npz'
 'data/CarModel\\Model082.npz' 'data/CarModel\\Model152.npz'
 'data/CarModel\\Model226.npz' 'data/CarModel\\Model078.npz'
 'data/CarModel\\Model087.npz' 'data/CarModel\\Model081.npz'
 'data/CarModel\\Model172.npz' 'data/CarModel\\Model122.npz'
 'data/C

ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\86187\AppData\Roaming\Python\Python310\site-packages\torch\utils\data\_utils\worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "C:\Users\86187\AppData\Roaming\Python\Python310\site-packages\torch\utils\data\_utils\fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "C:\Users\86187\AppData\Roaming\Python\Python310\site-packages\torch\utils\data\_utils\fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "C:\Users\86187\Desktop\graduate\autoMGN\autoMGN\tools\dataset.py", line 46, in __getitem__
    shape_id, loads_id, load_index = unpack_filename(path)
ValueError: not enough values to unpack (expected 3, got 2)


In [7]:
train_dataset = Dataset(config['dataset'], parts=[0, 1, 2, 3, 4, 5], npart=7, ids=['JOB1'])

[ 13 118  84  39 123 195 141  27  72  44 186  14 164 183  69 136 130 154
  12  34  89  40  17 181  35 150 180 119  36  99  79 104 131 147   6  78
  45 116  80 151 174 194 171  43 106  70  31  85 188 198 121  59 175  46
  18 189  93  41 113   3  62 135  24  52  71  33  63 109 158  32 148 105
  23   0 140 166  75 168  61 156  88  68 120 145  54 129  76 112  86  11
  73   4  95  67 122  64  47  58  83 197  20 170  57  97   1 146 169 165
  28 162   5 111 172 155 102   8  60 184  90 167 128 182  48 107  19 185
  77 157 143   9 138  81  87 187 178 161 137 159  82 126 133 176 103 153
 124 152  38 139  92 196 117 191  26  37 177  10  53  65 101 127  30  51
 115 134   7 193  29  91]
168
['data/CarModel\\Model016.npz' 'data/CarModel\\Model136.npz'
 'data/CarModel\\Model099.npz' 'data/CarModel\\Model050.npz'
 'data/CarModel\\Model142.npz' 'data/CarModel\\Model225.npz'
 'data/CarModel\\Model163.npz' 'data/CarModel\\Model035.npz'
 'data/CarModel\\Model087.npz' 'data/CarModel\\Model055.npz'
 'data/C

In [9]:
log = open(os.path.join(config['log_root'], 'log.txt'), 'a')

accumulate(model, train_dataloader, config)

RuntimeError: DataLoader worker (pid(s) 13860, 12736, 3340, 21840) exited unexpectedly

In [9]:
train_dataset = Dataset(config['dataset'], parts=[0, 1, 2, 3, 4, 5], npart=7, ids=['JOB1'])


[131 181  22 172 144  92  97 187  58  93   6  70 106  68 153 168 179 199
  29  46   9 142 134  88 193 110  26  32 117 112  17  39 166  13  94 138
 109 147  51 101  59 188 116   5 170  99 100 167 180 146  65   1 104  43
  38 184 123 171 137 162  71  44  95 174  12   7  54 152  21  47  28 176
  34   2 132 118  42 189 150  14 165  41 192  45  82 128  63  57 197 160
  53  75 108 135 121 159 183  67 169  50  87  69  89 196 115  19 148  96
  86  11   8  60  33 173  78   4 119 105 182 127 177  30 186  40  49 178
  76 157 161  73 164 151  31  74 191  27 125 198  81  20 155 114 139  36
  61  56 145  48  16  83  62  85 126   0 102  23   3 140  15 195 133 113
 190 141  52 163 156  80]
168
['data/CarModel\\Model152.npz' 'data/CarModel\\Model211.npz'
 'data/CarModel\\Model027.npz' 'data/CarModel\\Model201.npz'
 'data/CarModel\\Model166.npz' 'data/CarModel\\Model109.npz'
 'data/CarModel\\Model114.npz' 'data/CarModel\\Model217.npz'
 'data/CarModel\\Model072.npz' 'data/CarModel\\Model110.npz'
 'data/C

In [12]:
type(train_dataset)

tools.dataset.Dataset

In [21]:
train_dataset[10]

ValueError: not enough values to unpack (expected 3, got 2)