In [1]:
%matplotlib inline

import numpy as np
import pickle

np.random.seed(0)
from tensorflow.keras import optimizers

from sklearn.model_selection import train_test_split

In [2]:
n_epocs = 10000
epochs_per_stats = 1
batch_size = 2
test_size = 0.2
learning_rate = 0.002 * (batch_size / 1024.0)
momentum = 0.0

WORKING_DIR = '/home/richard/Downloads/nn/PSU_back/tf'

In [3]:
import torch
boards = torch.load('/home/richard/Downloads/connect4_boards.pth').numpy()
values = torch.load('/home/richard/Downloads/connect4_values.pth').numpy()

# Here we don't want to have the player to move channel
boards = boards[:, 1:]

board_train, board_test, value_train, value_test = train_test_split(boards, values, test_size=test_size, shuffle=True)

In [4]:
from connect4.neural.nn_tf import Connect4Sequence

# Also I think the shuffle is applied elsewhere
train_gen = Connect4Sequence(batch_size, board_train, value_train)
test_gen = Connect4Sequence(batch_size, board_test, value_test)

In [5]:
from connect4.neural.nn_tf import value_net as net

net.compile(
    optimizer=optimizers.Adam(),
    loss=['mean_squared_error'],
    # FIXME: investigate
    metrics=['accuracy'])

In [6]:
# option to load previous progress
# file_path = WORKING_DIR + '../nn9.pth'
file_path = None
if file_path is not None:
    checkpoint = torch.load(file_path)
    net.load_state_dict(checkpoint['net_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [None]:
net.evaluate_generator(test_gen)

In [None]:
from connect4.neural.stats import Stats

# Get an idea of how the initialisation is
def evaluate_fit(net, test_gen, device, output_stats=False):
    test_stats = Stats() if output_stats else None
    with torch.set_grad_enabled(False):
        net = net.eval()
        for board, value in test_gen:
            board, value = board.to(device), value.to(device)

            output = net(board)
            loss = criterion(output, value)
            output = output.view(-1)
#             output = torch.t(output)
#             value = value.unsqueeze(dim=1)
            print(output.shape, value.shape)
            print(output, value, loss)

            if output_stats:
                output = output.cpu().numpy().flatten()
                value = value.cpu().numpy().flatten()
                test_stats.update(output, value, loss)
    return test_stats

test_stats = evaluate_fit(net, test_gen, device, True)
print("Initial Test Stats:\n", test_stats)

In [None]:
%%time

from laplotter import LossAccPlotter
from visdom import Visdom

viz = Visdom()
plotter = LossAccPlotter()

for epoch in range(n_epocs):
    
    net = net.train()
    train_stats = Stats()
    
    for board, value in train_gen:
        board, value = board.to(device), value.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        output = net(board)
        loss = criterion(output, value)
        loss.backward()
        optimizer.step()
        
        if epoch % epochs_per_stats == 0:
            output = output.cpu().view(-1).detach().numpy()
            value = value.cpu().view(-1).numpy()
            train_stats.update(output, value, loss)

    # validate
    test_stats = evaluate_fit(net, test_gen, device, epoch % epochs_per_stats == 0)
                
    if test_stats is not None:            
        print("Epoch:  ", epoch, "\nTrain:\n", train_stats, "\nTest:\n", test_stats)
        plotter.add_values(epoch,
                           loss_train=train_stats.loss, acc_train=train_stats.accuracy,
                           loss_val=test_stats.loss, acc_val=test_stats.accuracy)
        if epoch == 0:
            win = viz.matplot(plotter.fig)
        else:
            viz.matplot(plotter.fig, win=win)
        
    torch.save({
        'net_state_dict': net.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()},
        WORKING_DIR + str(epoch) + '-' + str(batch_size) + '.pth')
            
print('Finished Training')

In [None]:
plotter.fig