In [24]:
%matplotlib inline

import numpy as np
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

from copy import copy
from sklearn.model_selection import train_test_split

# https://discuss.pytorch.org/t/output-of-resnet34-network-depends-on-the-batch-size/21647
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.manual_seed(999)
np.random.seed(0)

In [19]:
n_epocs = 10000
epochs_per_stats = 1
batch_size = 4096
test_size = 0.2
# learning_rate = 0.002 * (batch_size / 1024.0)
learning_rate = 0.01
momentum = 0.9

WORKING_DIR = '/home/richard/Downloads/nn/PSU_back/'

In [48]:
with open('/home/richard/data/connect4/7ply_boards.pkl', 'rb') as f:
    boards = pickle.load(f)
with open ('/home/richard/data/connect4/7ply_values.pkl', 'rb') as f:
    values = pickle.load(f)
with open('/home/richard/data/connect4/7ply_priors.pkl', 'rb') as f:
    priors = pickle.load(f)

priors = list(map(lambda x: x / np.sum(x) if np.sum(x) > 0.0 else np.zeros((7,)), priors))

In [4]:
from connect4.neural.nn_pytorch import Connect4Dataset

train = Connect4Dataset(boards, values, priors, to_move_channel=False)

train_gen = data.DataLoader(train, batch_size, shuffle=True)

Creating dataset with 27989 positions


In [5]:
from connect4.neural.nn_pytorch import build_policy_net

net = build_policy_net()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

Sequential(
  (0): Sequential(
    (0): Conv2d(2, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
  (1): Sequential(
    (0): ResidualLayer(
      (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (batch_norm2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): LeakyReLU(negative_slope=0.01)
    )
    (1): ResidualLayer(
      (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm1): BatchNorm2d(32, eps=1e-05, momentum=0

In [6]:
# criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()

optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
# optimizer = optim.Adam(net.parameters())

In [7]:
%%time

for epoch in range(n_epocs):
    net = net.train()
    
    for board, _, prior in train_gen:
        board, prior = board.to(device), prior.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        output = net(board)

        loss = criterion(output, prior)
        loss.backward()
        optimizer.step()
        
    # print last output
    print("epoch {}, loss: {}".format(epoch, loss))
    print(output[0], prior[0])
            
print('Finished Training')

  input = module(input)


epoch 0, loss: 0.4080538749694824
tensor([0.1737, 0.1630, 0.1086, 0.1353, 0.1210, 0.1561, 0.1424],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([1., 0., 0., 0., 0., 0., 0.], device='cuda:0')
epoch 1, loss: 0.4071711301803589
tensor([0.1659, 0.1352, 0.1442, 0.1401, 0.1240, 0.1465, 0.1442],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.2500, 0.2500, 0.0000, 0.0000, 0.2500, 0.0000, 0.2500],
       device='cuda:0')
epoch 2, loss: 0.40730640292167664
tensor([0.1581, 0.1441, 0.1493, 0.1252, 0.1428, 0.1450, 0.1355],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5000],
       device='cuda:0')
epoch 3, loss: 0.4067128300666809
tensor([0.1730, 0.1375, 0.1076, 0.1270, 0.1215, 0.1525, 0.1809],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.2500, 0.2500, 0.0000, 0.0000, 0.2500, 0.0000, 0.2500],
       device='cuda:0')
epoch 4, loss: 0.40575361251831055
tensor([0.1795, 0.1296, 0.1213, 0.1263, 0.1374,

epoch 36, loss: 0.3994678258895874
tensor([0.2098, 0.1147, 0.1139, 0.0806, 0.1227, 0.1638, 0.1944],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0., 0., 0., 0., 0., 1., 0.], device='cuda:0')
epoch 37, loss: 0.39910924434661865
tensor([0.1952, 0.1133, 0.1292, 0.1097, 0.1198, 0.1385, 0.1943],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0., 0., 0., 0., 0., 0., 1.], device='cuda:0')
epoch 38, loss: 0.39938247203826904
tensor([0.2511, 0.1259, 0.1013, 0.0938, 0.1019, 0.1435, 0.1825],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.3333, 0.3333, 0.0000, 0.0000, 0.0000, 0.3333, 0.0000],
       device='cuda:0')
epoch 39, loss: 0.40032079815864563
tensor([0.2063, 0.1383, 0.1056, 0.1109, 0.1261, 0.1381, 0.1747],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.0000, 0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5000],
       device='cuda:0')
epoch 40, loss: 0.39898040890693665
tensor([0.2031, 0.1344, 0.1184, 0.1309, 0.1011, 0.1215, 0.1907],
       dev

epoch 72, loss: 0.39137837290763855
tensor([0.2023, 0.1272, 0.0907, 0.0684, 0.1076, 0.2492, 0.1547],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.3333, 0.0000, 0.0000, 0.0000, 0.0000, 0.3333, 0.3333],
       device='cuda:0')
epoch 73, loss: 0.39160653948783875
tensor([0.3189, 0.1360, 0.0842, 0.0687, 0.0974, 0.1037, 0.1912],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.3333, 0.3333, 0.0000, 0.0000, 0.0000, 0.0000, 0.3333],
       device='cuda:0')
epoch 74, loss: 0.3921215534210205
tensor([0.2125, 0.1471, 0.1093, 0.0926, 0.0869, 0.1695, 0.1820],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429],
       device='cuda:0')
epoch 75, loss: 0.38936153054237366
tensor([0.1926, 0.1052, 0.0897, 0.1123, 0.1456, 0.1393, 0.2153],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5000],
       device='cuda:0')
epoch 76, loss: 0.39060455560684204
tenso

epoch 107, loss: 0.38491612672805786
tensor([0.1532, 0.1497, 0.0959, 0.1496, 0.1483, 0.1525, 0.1508],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429],
       device='cuda:0')
epoch 108, loss: 0.38410696387290955
tensor([0.1854, 0.0988, 0.2254, 0.0535, 0.0907, 0.1707, 0.1754],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.0000, 0.0000, 0.3333, 0.0000, 0.3333, 0.0000, 0.3333],
       device='cuda:0')
epoch 109, loss: 0.3848850727081299
tensor([0.2068, 0.0976, 0.1459, 0.0855, 0.1369, 0.1180, 0.2092],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429],
       device='cuda:0')
epoch 110, loss: 0.3833158314228058
tensor([0.1439, 0.1056, 0.2788, 0.0682, 0.1281, 0.0965, 0.1790],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0., 0., 1., 0., 0., 0., 0.], device='cuda:0')
epoch 111, loss: 0.3852848410606384
tensor([0.1968, 0.1689, 0.0716, 0.097

epoch 143, loss: 0.37913817167282104
tensor([0.3008, 0.3052, 0.0831, 0.0307, 0.0497, 0.0797, 0.1508],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([1., 0., 0., 0., 0., 0., 0.], device='cuda:0')
epoch 144, loss: 0.38019686937332153
tensor([0.1528, 0.1268, 0.1622, 0.1372, 0.1546, 0.1185, 0.1480],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429],
       device='cuda:0')
epoch 145, loss: 0.380321204662323
tensor([0.1324, 0.1721, 0.1648, 0.1367, 0.1342, 0.1284, 0.1315],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429, 0.1429],
       device='cuda:0')
epoch 146, loss: 0.3783697485923767
tensor([0.1563, 0.1439, 0.1276, 0.0767, 0.1537, 0.1209, 0.2210],
       device='cuda:0', grad_fn=<SelectBackward>) tensor([0.0000, 0.2500, 0.2500, 0.0000, 0.2500, 0.2500, 0.0000],
       device='cuda:0')
epoch 147, loss: 0.37918534874916077
tensor([0.2935, 0.1108, 0.0906, 0.036

KeyboardInterrupt: 