In [4]:
import glob
import time
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm as tq
from os import scandir
from torch.utils.tensorboard import SummaryWriter


In [70]:
# %load util.py
import torch
from torch.utils.data import Dataset
import sys
import random
import os
import numpy as np
from datetime import datetime as dt
import glob
from collections import Counter
from sgfmill import sgf
from sgfmill import boards
from sgfmill import ascii_boards
from tqdm import tqdm as tq

class GoDataset(Dataset):
  # pos_paths: list of paths to position features
  # labels: list of moves played in a given position
  def __init__(self, pos_paths, label_paths):
        self.label_paths = label_paths
        self.pos_paths = pos_paths

  def __len__(self):
        return len(self.pos_paths)

  def __getitem__(self, idx):
        pos = torch.load(self.pos_paths[idx])
        correctClass= torch.load(self.label_paths[idx])
        label = correctClass[0] * 19 + correctClass[1]
        return pos, label

def getPaths(prefix):
    fullDataPaths = []
    fullLabelPaths = []
    for rank in ['5k', '4k', '3k', '2k', '1k', '1d', '2d', '3d', '4d']:
        data = glob.glob(prefix + "data/" + rank + "/*.pt")
        labels = glob.glob(prefix + "labels/" + rank + "/*.pt")
        for i in tq(range(len(data))):
            assert data[i].rsplit('/', 1)[1] == labels[i].rsplit('/', 1)[1]
        fullDataPaths.extend(data)
        fullLabelPaths.extend(labels)
    return fullDataPaths, fullLabelPaths

def getCorrectCount(pred, y):
    # pred: batch_size x 361
    # y: batch_size x 361
    return (torch.argmax(pred, dim = 1) == torch.argmax(pred, y, dim = 1)).sum()

def buildFeatures(positions):
    # input: 19 x 19 x 3 input: my stones, their stones, empty spots
    # output: 19 x 19 x 32: 
    # 0 - 7 liberties 
    # 8 - 15 self capture size
    # 16 - 23 capture size
    # 24 - 31 liberties after move
    # candidates: open 1 liberty slots for opponent stones
    n = positions.shape[0]

    
    visited = positions[:, :, 2] - 1
    output = torch.zeros((n, n, 32))
    candidates = []
    libertyMap = dict()
    stoneMap = dict()
    colorMap = dict()
    
    def getLiberties(x, y, color, placeholder):
        firstLiberty = None
        liberties = 0
        stoneCount = 1
        visited[x][y] = placeholder
        visited2[x][y] = 1
        if x > 0:
            if visited2[x - 1][y] == 0:
                visited2[x - 1][y] = 1
                if positions[x - 1][y][2] == 1:
                    if liberties == 0:
                        firstLiberty = [x - 1, y]
                    liberties += 1
                elif positions[x - 1][y][color] == 1:
                    l, s, fl = getLiberties(x - 1, y, color, placeholder)
                    if liberties == 0:
                        firstLiberty = fl
                    liberties += l
                    stoneCount += s
        if x < n - 1:
            if visited2[x + 1][y] == 0:
                visited2[x + 1][y] = 1
                if positions[x + 1][y][2] == 1:
                    if liberties == 0:
                        firstLiberty = [x + 1, y]
                    liberties += 1
                elif positions[x + 1][y][color] == 1:
                    l, s, fl = getLiberties(x + 1, y, color, placeholder)
                    if liberties == 0:
                        firstLiberty = fl
                    liberties += l
                    stoneCount += s
        if y > 0:
            if visited2[x][y - 1] == 0:
                visited2[x][y - 1] = 1
                if positions[x][y - 1][2] == 1:
                    if liberties == 0:
                        firstLiberty = [x, y - 1]
                    liberties += 1
                elif positions[x][y - 1][color] == 1:
                    l, s, fl = getLiberties(x, y - 1, color, placeholder)
                    if liberties == 0:
                        firstLiberty = fl
                    liberties += l
                    stoneCount += s
        if y < n - 1:
            if visited2[x][y + 1] == 0:
                visited2[x][y + 1] = 1
                if positions[x][y + 1][2] == 1:
                    if liberties == 0:
                        firstLiberty = [x, y + 1]
                    liberties += 1
                elif positions[x][y + 1][color] == 1:
                    l, s, fl = getLiberties(x, y + 1, color, placeholder)
                    if liberties == 0:
                        firstLiberty = fl
                    liberties += l
                    stoneCount += s
        
        return liberties, stoneCount, firstLiberty
    
    counter = 1
    for i in range(n):
        for j in range(n):
            # visited: -1 if stone is there, 0 if no stone there, counter if visited + stone present
            if visited[i][j] == -1: 
                # visited2: for each loop through connected stones, 1 if visited or liberty has been checked
                visited2 = torch.zeros_like(visited) 
                color = int(positions[i][j][1] == 1) # 0 if your stone, 1 if opponent stone
                l, s, fl = getLiberties(i, j, color, counter)
                libertyMap[counter] = l
                stoneMap[counter] = s
                colorMap[counter] = color # 0 = my color, 1 = their color
                if l == 1 and color == 1:
                    candidates.append((fl[0], fl[1])) # simpleKo candidate: may not be legal move for me to capture
                if l == 1:
                    output[fl[0]][fl[1]][min(s, 8) - 1 + 8 * (color + 1)] = 1 # mark self captures / captures
                counter += 1
    for i in range(n):
        for j in range(n):
            x = int(visited[i][j].item())
            if x == 0: # mark liberties after move
                if i > 0:
                    key = int(visited[i - 1][j].item())
                    if key != 0 and colorMap[key] == 1:
                        l = libertyMap[key]
                        output[i][j][min(l, 8) + 23] = 1
                if i < n - 1:
                    key = int(visited[i + 1][j].item())
                    if key != 0 and colorMap[key] == 1:
                        l = libertyMap[key]
                        output[i][j][min(l, 8) + 23] = 1
                if j > 0:
                    key = int(visited[i][j - 1].item())
                    if key != 0 and colorMap[key] == 1:
                        l = libertyMap[key]
                        output[i][j][min(l, 8) + 23] = 1
                if j < n - 1:
                    key = int(visited[i][j + 1].item())
                    if key != 0 and colorMap[key] == 1:
                        l = libertyMap[key]
                        output[i][j][min(l, 8) + 23] = 1
            else:
                l = libertyMap[x]
                output[i][j][min(l, 8) - 1] = 1 # mark liberty counts for each stone location
            
    return output, candidates 

# 55 features
# 0-2: stone positions
# 3-4: all 0's, all 1's
# 5-12: turn history
# 13 - 36: liberties, self captures, captures
# 37 - 44: liberties after move
# 45: simple ko constraint
# 46 - 54: one hot encoding for rank
def gameToFeatures(game):
    swapColor = {'w': 'b', 'b': 'w'}
    root_node = game.get_root()
    b = boards.Board(19)
    
    rankOneHot = None
    for rankInd, rank in enumerate(['5çº§', '4çº§', '3çº§', '2çº§', '1çº§', '1æ®µ', '2æ®µ', '3æ®µ', '4æ®µ']):        
        if root_node.get("BR") == rank and root_node.get("WR") == rank:
            assert rankOneHot == None
            rankOneHot = rankInd
    assert rankOneHot != None
    
    features = []
    labels = []
    counter = 0
    for node in game.get_main_sequence():
        color, move = node.get_move()
        #print(color, move)
        feature = torch.zeros(19, 19, 55)
        if color == None:
            feature[:, :, 2] = 1
            feature[:, :, 4] = 1
            feature[:, :, 46 + rankOneHot] = 1
        else:
            labels.append([move[0], move[1]])
            b.play(move[0], move[1], color)
            for c, p in b.list_occupied_points():
                if c != color:
                    # my color: c / their color: color (last move made)
                    feature[p[0], p[1], 0] = 1
                else:
                    feature[p[0], p[1], 1] = 1
            feature[:, :, 2] = (feature[:, :, 0] + feature[:, :, 1]) == 0
            feature[:, :, 4] = 1
            feature[move[0], move[1], 5] = 1
            # moves 1-7 history from last feature => 2-8 history of current feature
            feature[:, :, 6:13] = features[-1][:, :, 5:12] 
            feature[:, :, 13:45], candidates = buildFeatures(feature[:, :, :3])
            feature[:, :, 45] = checkSimpleKo(oldb, b, candidates, swapColor[color])
            feature[:, :, 46 + rankOneHot] = 1
        features.append(feature)
        counter += 1
        oldb = b.copy()
    return features[:-1], labels

def checkSimpleKo(past, present, candidates, color):
    n = 19
    output = torch.zeros((n, n))
    for x, y in candidates:
        variation = present.copy()
        try: 
            variation.play(x, y, color)
            if variation.list_occupied_points() == past.list_occupied_points():
                output[x][y] = 1
        except Exception:
            pass
    return output

def filterGame(game, rank):
    board_size = game.get_size()
    if board_size != 19:
        return False
    root_node = game.get_root()
    if root_node.get("BR") != rank:
        return False
    if root_node.get("WR") != rank:
        return False
    if root_node.get("RU") != "Japanese":
        return False
    if root_node.get("TM") != 600:
        return False
    if root_node.get("KM") != 0:
        return False
    if dt.strptime(root_node.get("DT"), '%Y-%m-%d').year != 2017:
        return False
    return True 

In [7]:
'''writer = SummaryWriter(log_dir = "logs/test")
x = range(100)
for i in x:
    writer.add_scalar('y=2x', i * 2, i)
writer.close()

In [11]:
ranks = ['5k', '4k', '3k', '2k', '1k', '1d', '2d', '3d', '4d'] 

In [7]:
rank = ranks[0]
data = glob.glob("../cleanedGoData/train/data/" + rank + "/*.pt")
labels = glob.glob("../cleanedGoData/train/labels/" + rank + "/*.pt")

In [8]:
print(len(data), len(labels))

319964 319964


In [12]:
fullDataPaths = []
fullLabelPaths = []
for rank in ranks:
    data = glob.glob("../cleanedGoData/train/data/" + rank + "/*.pt")
    labels = glob.glob("../cleanedGoData/train/labels/" + rank + "/*.pt")
    for i in tq(range(len(data))):
        assert data[i].rsplit('/', 1)[1] == labels[i].rsplit('/', 1)[1]
    print(rank, len(data), len(labels))
    fullDataPaths.extend(data)
    fullLabelPaths.extend(labels)

 37%|███▋      | 119127/319964 [00:00<00:00, 1191214.33it/s]

../cleanedGoData/train/data/5k/316088.pt


100%|██████████| 319964/319964 [00:00<00:00, 1269923.82it/s]


5k 319964 319964


 79%|███████▉  | 251931/319088 [00:00<00:00, 1276348.46it/s]

../cleanedGoData/train/data/4k/316088.pt


100%|██████████| 319088/319088 [00:00<00:00, 1268491.19it/s]


4k 319088 319088


 22%|██▏       | 70161/322751 [00:00<00:00, 701585.58it/s]

../cleanedGoData/train/data/3k/316088.pt


100%|██████████| 322751/322751 [00:00<00:00, 852385.90it/s]


3k 322751 322751


 37%|███▋      | 117846/321571 [00:00<00:00, 1178300.99it/s]

../cleanedGoData/train/data/2k/316088.pt


100%|██████████| 321571/321571 [00:00<00:00, 1265001.90it/s]


2k 321571 321571


 79%|███████▉  | 257529/324844 [00:00<00:00, 1303778.35it/s]

../cleanedGoData/train/data/1k/316088.pt


100%|██████████| 324844/324844 [00:00<00:00, 1289951.49it/s]


1k 324844 324844


 36%|███▌      | 114755/318545 [00:00<00:00, 1147482.70it/s]

../cleanedGoData/train/data/1d/316088.pt


100%|██████████| 318545/318545 [00:00<00:00, 1261929.88it/s]


1d 318545 318545


 80%|████████  | 260129/323455 [00:00<00:00, 1306948.55it/s]

../cleanedGoData/train/data/2d/316088.pt


100%|██████████| 323455/323455 [00:00<00:00, 1300399.03it/s]


2d 323455 323455


 81%|████████  | 256257/318112 [00:00<00:00, 1287792.58it/s]

../cleanedGoData/train/data/3d/316088.pt


100%|██████████| 318112/318112 [00:00<00:00, 1288529.80it/s]


3d 318112 318112


 38%|███▊      | 121376/317397 [00:00<00:00, 1213717.75it/s]

../cleanedGoData/train/data/4d/316088.pt


100%|██████████| 317397/317397 [00:00<00:00, 1192802.56it/s]

4d 317397 317397





In [42]:
len(fullDataPaths)

2885727

In [43]:
len(fullLabelPaths)

2885727

In [124]:
len(fullLabelPaths) / 128 * 1.5 / 60 / 60

9.393642578125

In [13]:
training_data = GoDataset(fullDataPaths, fullLabelPaths)

In [14]:
train_dataloader = DataLoader(training_data, batch_size = 128, shuffle = True, num_workers = 4)

  cpuset_checked))


In [17]:
X, y = next(iter(train_dataloader))

In [71]:
	print("getting paths")
	trainDPaths, trainLPaths = getPaths("../cleanedGoData/train/")
	valDPaths, valLPaths = getPaths("../cleanedGoData/val/")
	trainDPaths = trainDPaths[:50000]
	trainLPaths = trainLPaths[:50000]
	valDPaths = valDPaths[:50000]
	valLPaths = valLPaths[:50000]
    
	print("building dataset")
	training_data = GoDataset(trainDPaths, trainLPaths)
	val_data = GoDataset(valDPaths, valLPaths)

	print("building dataloader")
	train_loader = DataLoader(training_data, batch_size = 128, shuffle = True, num_workers = 4)
	val_loader = DataLoader(val_data, batch_size = 128, shuffle = True, num_workers = 4)

getting paths


100%|██████████| 319964/319964 [00:00<00:00, 1442355.06it/s]
100%|██████████| 319088/319088 [00:00<00:00, 1456736.70it/s]
100%|██████████| 322751/322751 [00:00<00:00, 1443997.42it/s]
100%|██████████| 321571/321571 [00:00<00:00, 1435462.74it/s]
100%|██████████| 324844/324844 [00:00<00:00, 1414486.78it/s]
100%|██████████| 318545/318545 [00:00<00:00, 1429502.57it/s]
100%|██████████| 323455/323455 [00:00<00:00, 1348695.11it/s]
100%|██████████| 318112/318112 [00:00<00:00, 1386311.66it/s]
100%|██████████| 317397/317397 [00:00<00:00, 1420740.59it/s]
100%|██████████| 32505/32505 [00:00<00:00, 1352216.25it/s]
100%|██████████| 30276/30276 [00:00<00:00, 1397670.47it/s]
100%|██████████| 32120/32120 [00:00<00:00, 1428718.86it/s]
100%|██████████| 31421/31421 [00:00<00:00, 1332132.76it/s]
100%|██████████| 33065/33065 [00:00<00:00, 1308259.47it/s]
100%|██████████| 32256/32256 [00:00<00:00, 1324296.65it/s]
100%|██████████| 31045/31045 [00:00<00:00, 1338199.54it/s]
100%|██████████| 31018/31018 [00:00<00

building dataset
building dataloader


In [72]:
X, y = next(iter(train_loader))

In [76]:
X[y[0]]

tensor([[[0., 0., 1.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.]],

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.]],

        [[0., 0., 1.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [1., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 1., 

In [44]:
X[0].shape

torch.Size([128])

In [46]:
	for t in range(1):
		print("Epoch ", t + 1, "\n-----------------------------")
		with torch.enable_grad(), \
				tq(total=len(train_loader.dataset)) as progress_bar:
			for batch, (X, y) in enumerate(train_loader):
				print(X.shape)
				print(y.shape)
				break

  0%|          | 0/50000 [00:00<?, ?it/s]

Epoch  1 
-----------------------------
torch.Size([128, 19, 19, 55])
torch.Size([128, 361])


  0%|          | 0/50000 [00:07<?, ?it/s]


In [19]:
X

tensor([[[[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         ...,

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [1., 0., 0.,  ..., 0., 0., 0.],
          [0., 1., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 

In [191]:
y.shape

torch.Size([128, 361])

In [185]:
testLabel = torch.zeros((19, 19))
testLabel[5, 2] = 1
flatten = torch.nn.Flatten(0, -1)
flatten(testLabel).nonzero(as_tuple=True)[0]

tensor([97])

In [15]:
	if torch.cuda.is_available():
		device = torch.device('cuda:0')
	else:
		device = torch.device('cpu')
	print("using device: ", device)

using device:  cuda:0


In [20]:
batchSize = 0
for batch, (X, y) in tq(enumerate(train_dataloader)):
    print(X)


1it [00:05,  5.82s/it]

tensor([[[[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.]],

         [[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.]],

         [[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.]],

         ...,

         [[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [1., 0., 0.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 1., 0., 

3it [00:06,  1.59s/it]

tensor([[[[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [1., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         ...,

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 

5it [00:10,  2.02s/it]

tensor([[[[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         ...,

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [1., 0., 0.,  ..., 0., 0., 0.],
          [1., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [1., 0., 0.,  ..., 0., 0., 

7it [00:11,  1.25s/it]

tensor([[[[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.]],

         ...,

         [[0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          [0., 0., 1.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 

9it [00:16,  1.75s/it]

tensor([[[[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.]],

         [[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [1., 0., 0.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.]],

         [[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [1., 0., 0.,  ..., 1., 0., 0.],
          ...,
          [1., 0., 0.,  ..., 1., 0., 0.],
          [1., 0., 0.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.]],

         ...,

         [[0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          [0., 0., 1.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 1.,  ..., 1., 0., 

12it [00:16,  1.01s/it]

tensor([[[[0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.]],

         [[0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.]],

         [[0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          ...,
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.]],

         ...,

         [[0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          [0., 0., 1.,  ..., 0., 0., 1.],
          ...,
          [0., 1., 0.,  ..., 0., 0., 

12it [00:20,  1.68s/it]


KeyboardInterrupt: 

In [None]:
# BATCH SIZE 128
# shuffle True, num_workers 2      (2.5?)
# shuffle False, num_workers 2
# shuffle True num_workers 4        (1.3?)
# shuffle False num_workers 4

In [27]:
!ls

README.md	 __pycache__	  environment.yml  models.py  util.py
TrainTest.ipynb  buildDataset.py  layers.py	   train.py


In [29]:
pos1 = torch.load(fullDataPaths[0])

In [92]:
!ls ../cleanedGoData/train/data/

1d  1k	2d  2k	3d  3k	4d  4k	5k


In [108]:
posTotal = []
labelTotal = []
counter = 0
prefix = "../cleanedGoData/train/data/4k/"
for entry in tq(os.scandir(prefix)):
    pos = torch.load(prefix + entry.name)
    label = torch.load(prefix + entry.name)
    posTotal.append(pos)
    labelTotal.append(label) 
    counter += 1
    if counter == 50000:
        break

0it [00:00, ?it/s]

KeyboardInterrupt: 

In [100]:
torch.stack(posTotal).shape

torch.Size([50000, 19, 19, 55])

In [62]:
posTotal = torch.stack(posTotal)

In [56]:
torch.save(posTotal, "test.pt")

In [57]:
torch.load("test.pt").shape

torch.Size([100, 19, 19, 55])

In [60]:
!du -sh test.pt

7.6M	test.pt


In [37]:
len(train_dataloader.dataset)

2885727

In [18]:
!ls ../cleanedGoData/train/data/

1d  1k	2d  2k	3d  3k	4d  4k	5k


In [3]:
!ls

README.md	 __pycache__	  environment.yml  logs    models.py  train.py
TrainTest.ipynb  buildDataset.py  layers.py	   models  test.pt    util.py


In [12]:
bias = torch.zeros(19, 19)

In [13]:
counter = 1
for i in range(19):
    for j in range(19):
        if bias[i][j] != 0:
            continue
        bias[i][j] = counter
        bias[18 - i][j] = counter
        bias[i][18 - j] = counter
        bias[18 - i][18 - j] = counter
        bias[j][i] = counter
        bias[j][18 - i] = counter
        bias[18 - j][i] = counter
        bias[18 - j][18 - i] = counter
        counter += 1

In [15]:
361 / 8

45.125

In [59]:
torch.max(bias)

tensor(55.)

In [18]:
!ls

README.md	 __pycache__	  environment.yml  logs    models.py  train.py
TrainTest.ipynb  buildDataset.py  layers.py	   models  test.pt    util.py


In [23]:
!pip install tensorboard

Collecting tensorboard
  Downloading tensorboard-2.5.0-py3-none-any.whl (6.0 MB)
[K     |████████████████████████████████| 6.0 MB 6.4 MB/s eta 0:00:01
Collecting tensorboard-plugin-wit>=1.6.0
  Downloading tensorboard_plugin_wit-1.8.0-py3-none-any.whl (781 kB)
[K     |████████████████████████████████| 781 kB 52.1 MB/s eta 0:00:01
Collecting absl-py>=0.4
  Downloading absl_py-0.12.0-py3-none-any.whl (129 kB)
[K     |████████████████████████████████| 129 kB 48.7 MB/s eta 0:00:01
Collecting tensorboard-data-server<0.7.0,>=0.6.0
  Downloading tensorboard_data_server-0.6.1-py3-none-manylinux2010_x86_64.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 48.9 MB/s eta 0:00:01
Collecting werkzeug>=0.11.15
  Downloading Werkzeug-2.0.1-py3-none-any.whl (288 kB)
[K     |████████████████████████████████| 288 kB 47.1 MB/s eta 0:00:01
Installing collected packages: werkzeug, tensorboard-plugin-wit, tensorboard-data-server, absl-py, tensorboard
Successfully installed absl-py-0.12.0 te

In [62]:
y[ind[0][0]]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 

In [61]:
ind = torch.zeros((1, 1), dtype = torch.int32)

In [66]:
loss_fn = torch.nn.CrossEntropyLoss()

In [82]:
y.shape

torch.Size([128])

In [101]:
pred = torch.zeros((128, 361))
pred[:, 3] = 1

In [96]:
pred.shape

torch.Size([128, 361])

In [102]:
(torch.argmax(pred, dim = 1) == y).sum()

tensor(2)

In [99]:
torch.argmax(pred, dim = 1)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])

In [93]:
y

tensor([ 28, 198,   9, 119, 321,  19,  56, 302, 283, 287,  99, 271,  93, 314,
        115,  41, 315, 239,  66, 321,  93, 282, 239, 169,  40, 247, 154, 258,
        285, 264, 180,  44, 143, 108,  96, 288, 124,  60, 104,  85, 328, 240,
        338,  32,  88, 270, 167, 193,  81,  38, 211, 261,  61,  58, 274,  62,
         51, 181, 312,  86, 224, 312, 206,   3, 187, 232, 260,  24,  23, 178,
        216, 267, 196, 338,  73, 197,  50,  43, 318,  14, 272,   3, 118,  87,
        144, 179, 327,  26, 158, 185, 137,  79,  28, 235, 184, 317, 191,  49,
        265, 288, 261, 123,  70, 242, 297,  79, 109, 277, 319,  90, 108, 160,
        153, 155, 198, 128, 146, 242,  66, 129, 287, 271,  53, 169, 125,  42,
         40, 149])

In [98]:
pred

tensor([[28., 28., 28.,  ..., 28., 28., 28.],
        [28., 28., 28.,  ..., 28., 28., 28.],
        [28., 28., 28.,  ..., 28., 28., 28.],
        ...,
        [28., 28., 28.,  ..., 28., 28., 28.],
        [28., 28., 28.,  ..., 28., 28., 28.],
        [28., 28., 28.,  ..., 28., 28., 28.]])

In [115]:
with tq(total = 50000) as progress_bar:
    for i in range(500):
        progress_bar.update(100)
        progress_bar.set_postfix(epoch=i + 1)
        with tq(total=1000, leave = False) as progress_bar2:
            for j in range(10):
                progress_bar2.update(100)
                progress_bar2.set_postfix(acc = j)

  0%|          | 100/50000 [00:00<00:00, 2173214.51it/s, epoch=1]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 96243.78it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 59066.39it/s, acc=1][A
 30%|███       | 300/1000 [00:00<00:00, 54732.11it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 54476.79it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 54357.12it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 54378.50it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 53970.82it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 53664.77it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 53349.83it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 53406.81it/s, acc=9][A
  0%|          | 200/50000 [00:00<00:06, 7334.24it/s, epoch=2]   
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 135082.25it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 64877.09it/s, acc=1] [A
 30%|███ 

 70%|███████   | 700/1000 [00:00<00:00, 52919.25it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 52541.27it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 11949.28it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 12912.19it/s, acc=9][A
  5%|▍         | 2300/50000 [00:00<00:14, 3286.25it/s, epoch=23]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 132563.34it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 67841.55it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 59987.19it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 54818.55it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 53205.60it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 51893.65it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 51354.03it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 50340.46it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 49522.78it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 49320.38it/s, a

 30%|███       | 300/1000 [00:00<00:00, 61808.19it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 57663.57it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 55552.22it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 54595.56it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 53808.61it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 52706.33it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 51535.52it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 50918.43it/s, acc=9][A
  9%|▉         | 4500/50000 [00:01<00:16, 2807.76it/s, epoch=45]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 143101.47it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 69615.00it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 59688.40it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 56344.76it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 54029.42it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 53256.49it/s, ac

 13%|█▎        | 6600/50000 [00:02<00:15, 2781.97it/s, epoch=66]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 128226.96it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 69110.30it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 59116.34it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 54917.24it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 53200.20it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 52488.94it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 51572.33it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 50585.59it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 50479.05it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 50192.11it/s, acc=9][A
 13%|█▎        | 6700/50000 [00:02<00:15, 2781.97it/s, epoch=67]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 147530.92it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 74051.98it/s, acc=1] [A
 30%|███ 

 70%|███████   | 700/1000 [00:00<00:00, 53405.36it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 52659.18it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 52292.26it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 51811.59it/s, acc=9][A
 18%|█▊        | 8800/50000 [00:02<00:13, 2968.50it/s, epoch=88]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 154145.68it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 72647.51it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 62085.72it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 57330.56it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 55194.02it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 53760.49it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 53147.24it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 52521.53it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 51911.84it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 51000.78it/s, a

 30%|███       | 300/1000 [00:00<00:00, 62297.81it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 7239.83it/s, acc=3] [A
 50%|█████     | 500/1000 [00:00<00:00, 8752.58it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 10149.76it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 11467.59it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 12666.93it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 13807.10it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 14864.98it/s, acc=9][A
 22%|██▏       | 11000/50000 [00:03<00:12, 3057.85it/s, epoch=110]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 156737.82it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 70634.96it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 61862.89it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 57980.43it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 56907.41it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 55895.48it/s, a

 26%|██▌       | 13100/50000 [00:04<00:11, 3125.61it/s, epoch=131]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 166705.25it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 71132.10it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 62353.38it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 45754.38it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 47000.27it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 41796.75it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 42619.47it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 42734.70it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 43292.81it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 43719.36it/s, acc=9][A
 26%|██▋       | 13200/50000 [00:04<00:12, 2928.94it/s, epoch=132]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 167237.00it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 75839.51it/s, acc=1] [A
 30%|

 70%|███████   | 700/1000 [00:00<00:00, 56339.35it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 54783.64it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 54667.11it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 54638.95it/s, acc=9][A
 31%|███       | 15300/50000 [00:05<00:11, 3031.96it/s, epoch=153]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 157858.64it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 76343.36it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 64957.47it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 60185.16it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 57028.12it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 54904.06it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 53568.08it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 53450.20it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 52361.17it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 51943.78it/s,

 30%|███       | 300/1000 [00:00<00:00, 66862.81it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 61085.80it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 58102.51it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 56787.22it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 56192.71it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 55053.29it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 54700.39it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 54260.08it/s, acc=9][A
 35%|███▌      | 17500/50000 [00:05<00:10, 3059.58it/s, epoch=175]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 163904.03it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 79055.77it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 66362.07it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 62009.23it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 59833.15it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 58394.80it/s, 

 39%|███▉      | 19600/50000 [00:06<00:09, 3094.56it/s, epoch=196]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 120873.31it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 65968.92it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 59589.47it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 56920.16it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 55511.05it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 53794.97it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 52467.21it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 52103.16it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 52009.12it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 51983.05it/s, acc=9][A
 39%|███▉      | 19700/50000 [00:06<00:10, 2970.10it/s, epoch=197]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 129935.07it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 70321.13it/s, acc=1] [A
 30%|

 70%|███████   | 700/1000 [00:00<00:00, 46411.84it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 47075.44it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 47048.30it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 47081.52it/s, acc=9][A
 44%|████▎     | 21800/50000 [00:07<00:09, 3035.60it/s, epoch=218]
  0%|          | 0/1000 [00:00<?, ?it/s][A
 10%|█         | 100/1000 [00:00<00:00, 167906.49it/s, acc=0][A
 20%|██        | 200/1000 [00:00<00:00, 78795.87it/s, acc=1] [A
 30%|███       | 300/1000 [00:00<00:00, 67338.71it/s, acc=2][A
 40%|████      | 400/1000 [00:00<00:00, 60213.24it/s, acc=3][A
 50%|█████     | 500/1000 [00:00<00:00, 56859.58it/s, acc=4][A
 60%|██████    | 600/1000 [00:00<00:00, 55612.62it/s, acc=5][A
 70%|███████   | 700/1000 [00:00<00:00, 54838.77it/s, acc=6][A
 80%|████████  | 800/1000 [00:00<00:00, 53606.47it/s, acc=7][A
 90%|█████████ | 900/1000 [00:00<00:00, 53185.96it/s, acc=8][A
100%|██████████| 1000/1000 [00:00<00:00, 52995.86it/s,

KeyboardInterrupt: 

In [118]:
print("\ntest")


test


In [2]:
#!python train.py test

Tensorboard logs will be saved here:  logs/test
using device:  cuda:0
model will be saved here:  models/test
getting paths
100%|██████████████████████████████| 319964/319964 [00:00<00:00, 1306409.64it/s]
100%|██████████████████████████████| 319088/319088 [00:00<00:00, 1335318.23it/s]
100%|██████████████████████████████| 322751/322751 [00:00<00:00, 1320883.92it/s]
100%|██████████████████████████████| 321571/321571 [00:00<00:00, 1371354.11it/s]
100%|██████████████████████████████| 324844/324844 [00:00<00:00, 1384693.90it/s]
100%|██████████████████████████████| 318545/318545 [00:00<00:00, 1332159.35it/s]
100%|██████████████████████████████| 323455/323455 [00:00<00:00, 1352110.64it/s]
100%|██████████████████████████████| 318112/318112 [00:00<00:00, 1351866.50it/s]
100%|██████████████████████████████| 317397/317397 [00:00<00:00, 1344885.89it/s]
100%|████████████████████████████████| 32505/32505 [00:00<00:00, 1184993.32it/s]
100%|████████████████████████████████| 30276/30276 [00:00<00:00, 12

 61%|████████████████▌          | 6144/10000 [00:17<00:09, 424.19it/s, NLL=4.98][A
 63%|████████████████▉          | 6272/10000 [00:18<00:07, 519.44it/s, NLL=4.98][A
 63%|████████████████▉          | 6272/10000 [00:18<00:07, 519.44it/s, NLL=4.99][A
 64%|█████████████████▎         | 6400/10000 [00:18<00:08, 403.33it/s, NLL=4.99][A
 64%|█████████████████▎         | 6400/10000 [00:18<00:08, 403.33it/s, NLL=4.99][A
 65%|█████████████████▋         | 6528/10000 [00:18<00:08, 403.33it/s, NLL=4.99][A
 67%|█████████████████▉         | 6656/10000 [00:18<00:08, 403.33it/s, NLL=4.99][A
 68%|██████████████████▎        | 6784/10000 [00:19<00:06, 504.79it/s, NLL=4.99][A
 68%|██████████████████▎        | 6784/10000 [00:19<00:06, 504.79it/s, NLL=4.99][A
 69%|██████████████████▋        | 6912/10000 [00:19<00:07, 388.82it/s, NLL=4.99][A
 69%|██████████████████▋        | 6912/10000 [00:19<00:07, 388.82it/s, NLL=4.99][A
 70%|███████████████████        | 7040/10000 [00:20<00:07, 388.82it/s, NLL=4

 37%|█████████▋                | 3712/10000 [00:03<00:04, 1277.57it/s, NLL=4.99][A
 38%|█████████▉                | 3840/10000 [00:03<00:04, 1279.96it/s, NLL=4.99][A
 38%|█████████▉                | 3840/10000 [00:03<00:04, 1279.96it/s, NLL=4.99][A
 40%|██████████▎               | 3968/10000 [00:03<00:04, 1279.96it/s, NLL=4.99][A
 41%|██████████▋               | 4096/10000 [00:03<00:04, 1192.46it/s, NLL=4.99][A
 41%|██████████▋               | 4096/10000 [00:03<00:04, 1192.46it/s, NLL=4.99][A
 42%|██████████▉               | 4224/10000 [00:03<00:04, 1192.46it/s, NLL=4.99][A
 44%|███████████▎              | 4352/10000 [00:03<00:04, 1288.47it/s, NLL=4.99][A
 44%|████████████▌                | 4352/10000 [00:03<00:04, 1288.47it/s, NLL=5][A
 45%|████████████▉                | 4480/10000 [00:04<00:04, 1288.47it/s, NLL=5][A
 46%|█████████████▎               | 4608/10000 [00:04<00:04, 1179.68it/s, NLL=5][A
 46%|█████████████▎               | 4608/10000 [00:04<00:04, 1179.68it/s, NL

 17%|████▎                     | 1664/10000 [00:01<00:05, 1400.86it/s, NLL=5.01][A
 18%|████▋                     | 1792/10000 [00:01<00:05, 1400.86it/s, NLL=5.01][A
 19%|████▉                     | 1920/10000 [00:01<00:05, 1502.16it/s, NLL=5.01][A
 19%|████▉                     | 1920/10000 [00:01<00:05, 1502.16it/s, NLL=5.02][A
 20%|█████▎                    | 2048/10000 [00:01<00:05, 1502.16it/s, NLL=5.03][A
 22%|█████▋                    | 2176/10000 [00:01<00:05, 1562.58it/s, NLL=5.03][A
 22%|█████▋                    | 2176/10000 [00:01<00:05, 1562.58it/s, NLL=5.02][A
 23%|█████▉                    | 2304/10000 [00:02<00:04, 1562.58it/s, NLL=5.02][A
 24%|██████▎                   | 2432/10000 [00:02<00:04, 1516.33it/s, NLL=5.02][A
 24%|██████▎                   | 2432/10000 [00:02<00:04, 1516.33it/s, NLL=5.01][A
 26%|██████▋                   | 2560/10000 [00:02<00:04, 1516.33it/s, NLL=5.02][A
 27%|██████▉                   | 2688/10000 [00:02<00:04, 1598.26it/s, NLL=5

100%|████████████████████████████| 10000/10000 [00:06<00:00, 1501.87it/s, NLL=5][A
  7%|▊           | 200192/2885727 [25:02<4:06:19, 181.70it/s, NLL=4.95, epoch=1]
evaluating model...

  0%|                                                 | 0/10000 [00:00<?, ?it/s][A
  1%|▍                                     | 128/10000 [00:00<00:38, 254.94it/s][A
  1%|▎                            | 128/10000 [00:00<00:38, 254.94it/s, NLL=5.1][A
  3%|▋                            | 256/10000 [00:00<00:22, 423.67it/s, NLL=5.1][A
  3%|▋                           | 256/10000 [00:00<00:22, 423.67it/s, NLL=4.98][A
  4%|█                           | 384/10000 [00:00<00:22, 423.67it/s, NLL=4.98][A
  5%|█▍                          | 512/10000 [00:00<00:13, 699.11it/s, NLL=4.98][A
  5%|█▍                          | 512/10000 [00:00<00:13, 699.11it/s, NLL=4.96][A
  6%|█▊                          | 640/10000 [00:00<00:13, 699.11it/s, NLL=4.96][A
  8%|██▏                         | 768/10000 [00:01<00:09,

 81%|████████████████████▉     | 8064/10000 [00:05<00:01, 1523.56it/s, NLL=4.99][A
 82%|█████████████████████▎    | 8192/10000 [00:05<00:01, 1613.44it/s, NLL=4.99][A
 82%|█████████████████████▎    | 8192/10000 [00:05<00:01, 1613.44it/s, NLL=4.99][A
 83%|█████████████████████▋    | 8320/10000 [00:06<00:01, 1613.44it/s, NLL=4.99][A
 84%|█████████████████████▉    | 8448/10000 [00:06<00:01, 1526.73it/s, NLL=4.99][A
 84%|█████████████████████▉    | 8448/10000 [00:06<00:01, 1526.73it/s, NLL=4.99][A
 86%|██████████████████████▎   | 8576/10000 [00:06<00:00, 1526.73it/s, NLL=4.99][A
 87%|██████████████████████▋   | 8704/10000 [00:06<00:00, 1432.04it/s, NLL=4.99][A
 87%|██████████████████████▋   | 8704/10000 [00:06<00:00, 1432.04it/s, NLL=4.99][A
 88%|██████████████████████▉   | 8832/10000 [00:06<00:00, 1432.04it/s, NLL=4.99][A
 90%|███████████████████████▎  | 8960/10000 [00:06<00:00, 1238.53it/s, NLL=4.99][A
 90%|███████████████████████▎  | 8960/10000 [00:06<00:00, 1238.53it/s, NLL=4

 60%|███████████████▋          | 6016/10000 [00:04<00:03, 1118.83it/s, NLL=4.99][A
 61%|███████████████▉          | 6144/10000 [00:05<00:03, 1270.24it/s, NLL=4.99][A
 61%|███████████████▉          | 6144/10000 [00:05<00:03, 1270.24it/s, NLL=4.99][A
 63%|████████████████▎         | 6272/10000 [00:05<00:02, 1270.24it/s, NLL=4.99][A
 64%|████████████████▋         | 6400/10000 [00:05<00:02, 1319.43it/s, NLL=4.99][A
 64%|████████████████▋         | 6400/10000 [00:05<00:02, 1319.43it/s, NLL=4.99][A
 65%|████████████████▉         | 6528/10000 [00:05<00:02, 1319.43it/s, NLL=4.99][A
 67%|█████████████████▎        | 6656/10000 [00:05<00:02, 1488.39it/s, NLL=4.99][A
 67%|█████████████████▎        | 6656/10000 [00:05<00:02, 1488.39it/s, NLL=4.99][A
 68%|█████████████████▋        | 6784/10000 [00:05<00:02, 1488.39it/s, NLL=4.99][A
 69%|█████████████████▉        | 6912/10000 [00:05<00:02, 1374.44it/s, NLL=4.99][A
 69%|████████████████████         | 6912/10000 [00:05<00:02, 1374.44it/s, NL

 41%|███████████▉                 | 4096/10000 [00:03<00:04, 1438.88it/s, NLL=5][A
 42%|████████████▏                | 4224/10000 [00:03<00:04, 1438.88it/s, NLL=5][A
 44%|████████████▌                | 4352/10000 [00:03<00:03, 1432.78it/s, NLL=5][A
 44%|███████████▎              | 4352/10000 [00:03<00:03, 1432.78it/s, NLL=5.01][A
 45%|████████████▉                | 4480/10000 [00:03<00:03, 1432.78it/s, NLL=5][A
 46%|█████████████▎               | 4608/10000 [00:03<00:03, 1437.73it/s, NLL=5][A
 46%|█████████████▎               | 4608/10000 [00:03<00:03, 1437.73it/s, NLL=5][A
 47%|█████████████▋               | 4736/10000 [00:03<00:03, 1437.73it/s, NLL=5][A
 49%|██████████████               | 4864/10000 [00:04<00:03, 1473.01it/s, NLL=5][A
 49%|██████████████               | 4864/10000 [00:04<00:03, 1473.01it/s, NLL=5][A
 50%|██████████████▍              | 4992/10000 [00:04<00:03, 1473.01it/s, NLL=5][A
 51%|██████████████▊              | 5120/10000 [00:04<00:03, 1463.20it/s, NL

 23%|█████▉                    | 2304/10000 [00:02<00:05, 1342.21it/s, NLL=5.01][A
 24%|██████▎                   | 2432/10000 [00:02<00:05, 1342.21it/s, NLL=5.01][A
 26%|██████▋                   | 2560/10000 [00:02<00:05, 1371.74it/s, NLL=5.01][A
 26%|██████▋                   | 2560/10000 [00:02<00:05, 1371.74it/s, NLL=5.01][A
 27%|██████▉                   | 2688/10000 [00:02<00:05, 1371.74it/s, NLL=5.01][A
 28%|███████▎                  | 2816/10000 [00:02<00:05, 1429.21it/s, NLL=5.01][A
 28%|███████▎                  | 2816/10000 [00:02<00:05, 1429.21it/s, NLL=5.01][A
 29%|███████▋                  | 2944/10000 [00:02<00:04, 1429.21it/s, NLL=5.01][A
 31%|███████▉                  | 3072/10000 [00:02<00:04, 1442.22it/s, NLL=5.01][A
 31%|███████▉                  | 3072/10000 [00:02<00:04, 1442.22it/s, NLL=5.01][A
 32%|████████▎                 | 3200/10000 [00:02<00:04, 1442.22it/s, NLL=5.01][A
 33%|████████▋                 | 3328/10000 [00:02<00:04, 1454.15it/s, NLL=5

  5%|█▍                          | 512/10000 [00:00<00:13, 694.57it/s, NLL=4.93][A
  6%|█▊                          | 640/10000 [00:01<00:13, 694.57it/s, NLL=4.95][A
  8%|██▏                         | 768/10000 [00:01<00:09, 945.74it/s, NLL=4.95][A
  8%|██▏                         | 768/10000 [00:01<00:09, 945.74it/s, NLL=4.95][A
  9%|██▌                         | 896/10000 [00:01<00:09, 945.74it/s, NLL=4.97][A
 10%|██▋                       | 1024/10000 [00:01<00:08, 1110.56it/s, NLL=4.97][A
 10%|██▋                       | 1024/10000 [00:01<00:08, 1110.56it/s, NLL=4.96][A
 12%|██▉                       | 1152/10000 [00:01<00:07, 1110.56it/s, NLL=4.96][A
 13%|███▎                      | 1280/10000 [00:01<00:07, 1216.26it/s, NLL=4.96][A
 13%|███▎                      | 1280/10000 [00:01<00:07, 1216.26it/s, NLL=4.98][A
 14%|███▋                      | 1408/10000 [00:01<00:07, 1216.26it/s, NLL=4.98][A
 15%|███▉                      | 1536/10000 [00:01<00:06, 1323.31it/s, NLL=4

 88%|██████████████████████▉   | 8832/10000 [00:06<00:00, 1601.44it/s, NLL=4.99][A
 90%|███████████████████████▎  | 8960/10000 [00:06<00:00, 1517.19it/s, NLL=4.99][A
 90%|█████████████████████████▉   | 8960/10000 [00:06<00:00, 1517.19it/s, NLL=5][A
 91%|██████████████████████████▎  | 9088/10000 [00:06<00:00, 1517.19it/s, NLL=5][A
 92%|██████████████████████████▋  | 9216/10000 [00:06<00:00, 1517.19it/s, NLL=5][A
 93%|███████████████████████████  | 9344/10000 [00:06<00:00, 1517.19it/s, NLL=5][A
 95%|███████████████████████████▍ | 9472/10000 [00:06<00:00, 2107.31it/s, NLL=5][A
 95%|███████████████████████████▍ | 9472/10000 [00:06<00:00, 2107.31it/s, NLL=5][A
 96%|███████████████████████████▊ | 9600/10000 [00:06<00:00, 2107.31it/s, NLL=5][A
 97%|████████████████████████████▏| 9728/10000 [00:06<00:00, 2107.31it/s, NLL=5][A
 99%|████████████████████████████▌| 9856/10000 [00:06<00:00, 2107.31it/s, NLL=5][A
100%|████████████████████████████▉| 9984/10000 [00:06<00:00, 2643.37it/s, NL

In [20]:
# !python train.py testConvNoRes

Tensorboard logs will be saved here:  logs/testConvNoRes
using device:  cuda:0
model will be saved here:  models/testConvNoRes
getting paths
100%|██████████████████████████████| 319964/319964 [00:00<00:00, 1326483.87it/s]
100%|██████████████████████████████| 319088/319088 [00:00<00:00, 1309444.77it/s]
100%|██████████████████████████████| 322751/322751 [00:00<00:00, 1346939.36it/s]
100%|██████████████████████████████| 321571/321571 [00:00<00:00, 1357875.16it/s]
100%|██████████████████████████████| 324844/324844 [00:00<00:00, 1355725.35it/s]
100%|██████████████████████████████| 318545/318545 [00:00<00:00, 1365047.71it/s]
100%|██████████████████████████████| 323455/323455 [00:00<00:00, 1365506.16it/s]
100%|██████████████████████████████| 318112/318112 [00:00<00:00, 1344671.57it/s]
100%|██████████████████████████████| 317397/317397 [00:00<00:00, 1368372.51it/s]
100%|████████████████████████████████| 32505/32505 [00:00<00:00, 1154791.60it/s]
100%|████████████████████████████████| 30276/3027

 58%|███████████████▌           | 5760/10000 [00:19<00:09, 449.89it/s, NLL=3.09][A
 59%|███████████████▉           | 5888/10000 [00:19<00:09, 449.89it/s, NLL=3.08][A
 60%|████████████████▏          | 6016/10000 [00:19<00:09, 411.19it/s, NLL=3.08][A
 60%|████████████████▏          | 6016/10000 [00:19<00:09, 411.19it/s, NLL=3.09][A
 61%|████████████████▌          | 6144/10000 [00:19<00:09, 411.19it/s, NLL=3.08][A
 63%|████████████████▉          | 6272/10000 [00:20<00:08, 459.24it/s, NLL=3.08][A
 63%|████████████████▉          | 6272/10000 [00:20<00:08, 459.24it/s, NLL=3.08][A
 64%|█████████████████▎         | 6400/10000 [00:20<00:08, 432.35it/s, NLL=3.08][A
 64%|█████████████████▎         | 6400/10000 [00:20<00:08, 432.35it/s, NLL=3.08][A
 65%|█████████████████▋         | 6528/10000 [00:20<00:07, 495.50it/s, NLL=3.08][A
 65%|█████████████████▋         | 6528/10000 [00:20<00:07, 495.50it/s, NLL=3.09][A
 67%|█████████████████▉         | 6656/10000 [00:20<00:06, 495.50it/s, NLL=3

 28%|███████▌                   | 2816/10000 [00:08<00:18, 389.81it/s, NLL=2.53][A
 29%|███████▉                   | 2944/10000 [00:08<00:18, 389.81it/s, NLL=2.52][A
 31%|████████▎                  | 3072/10000 [00:08<00:12, 554.96it/s, NLL=2.52][A
 31%|████████▎                  | 3072/10000 [00:08<00:12, 554.96it/s, NLL=2.51][A
 32%|████████▋                  | 3200/10000 [00:09<00:15, 431.96it/s, NLL=2.51][A
 32%|████████▋                  | 3200/10000 [00:09<00:15, 431.96it/s, NLL=2.51][A
 33%|████████▉                  | 3328/10000 [00:09<00:15, 419.22it/s, NLL=2.51][A
 33%|████████▉                  | 3328/10000 [00:09<00:15, 419.22it/s, NLL=2.51][A
 35%|█████████▎                 | 3456/10000 [00:09<00:15, 419.22it/s, NLL=2.51][A
 36%|█████████▋                 | 3584/10000 [00:09<00:10, 592.68it/s, NLL=2.51][A
 36%|██████████                  | 3584/10000 [00:09<00:10, 592.68it/s, NLL=2.5][A
 37%|██████████▍                 | 3712/10000 [00:10<00:13, 474.14it/s, NLL=

100%|██████████████████████████▉| 9984/10000 [00:22<00:00, 401.80it/s, NLL=2.55][A
100%|██████████████████████████| 10000/10000 [00:22<00:00, 444.44it/s, NLL=2.55][A
new best acc of  tensor(0.3826, device='cuda:0') Saving model at:  models/testConvNoRes100096
  5%|▌           | 150144/2885727 [19:46<5:08:04, 147.99it/s, NLL=2.73, epoch=1]
evaluating model...

  0%|                                                 | 0/10000 [00:00<?, ?it/s][A
  1%|▍                                     | 128/10000 [00:00<01:10, 139.99it/s][A
  1%|▎                           | 128/10000 [00:00<01:10, 139.99it/s, NLL=2.63][A
  3%|▋                           | 256/10000 [00:01<00:33, 290.13it/s, NLL=2.63][A
  3%|▋                           | 256/10000 [00:01<00:33, 290.13it/s, NLL=2.41][A
  4%|█                           | 384/10000 [00:01<00:22, 426.78it/s, NLL=2.41][A
  4%|█                           | 384/10000 [00:01<00:22, 426.78it/s, NLL=2.42][A
  5%|█▍                          | 512/10000 [00

 73%|██████████████████▉       | 7296/10000 [00:06<00:02, 1275.70it/s, NLL=2.51][A
 74%|███████████████████▎      | 7424/10000 [00:06<00:02, 1249.35it/s, NLL=2.51][A
 74%|████████████████████       | 7424/10000 [00:06<00:02, 1249.35it/s, NLL=2.5][A
 76%|████████████████████▍      | 7552/10000 [00:06<00:01, 1249.35it/s, NLL=2.5][A
 77%|████████████████████▋      | 7680/10000 [00:06<00:01, 1277.95it/s, NLL=2.5][A
 77%|████████████████████▋      | 7680/10000 [00:06<00:01, 1277.95it/s, NLL=2.5][A
 78%|█████████████████████      | 7808/10000 [00:07<00:01, 1267.82it/s, NLL=2.5][A
 78%|█████████████████████      | 7808/10000 [00:07<00:01, 1267.82it/s, NLL=2.5][A
 79%|█████████████████████▍     | 7936/10000 [00:07<00:01, 1261.51it/s, NLL=2.5][A
 79%|█████████████████████▍     | 7936/10000 [00:07<00:01, 1261.51it/s, NLL=2.5][A
 81%|█████████████████████▊     | 8064/10000 [00:07<00:01, 1233.59it/s, NLL=2.5][A
 81%|█████████████████████▊     | 8064/10000 [00:07<00:01, 1233.59it/s, NLL=

 44%|███████████▎              | 4352/10000 [00:04<00:04, 1271.33it/s, NLL=2.47][A
 45%|███████████▋              | 4480/10000 [00:04<00:04, 1265.09it/s, NLL=2.47][A
 45%|███████████▋              | 4480/10000 [00:04<00:04, 1265.09it/s, NLL=2.48][A
 46%|███████████▉              | 4608/10000 [00:04<00:04, 1202.98it/s, NLL=2.48][A
 46%|███████████▉              | 4608/10000 [00:04<00:04, 1202.98it/s, NLL=2.47][A
 47%|████████████▎             | 4736/10000 [00:04<00:04, 1202.98it/s, NLL=2.47][A
 49%|████████████▋             | 4864/10000 [00:04<00:04, 1269.74it/s, NLL=2.47][A
 49%|████████████▋             | 4864/10000 [00:04<00:04, 1269.74it/s, NLL=2.47][A
 50%|████████████▉             | 4992/10000 [00:04<00:03, 1269.74it/s, NLL=2.47][A
 51%|█████████████▎            | 5120/10000 [00:04<00:03, 1293.17it/s, NLL=2.47][A
 51%|█████████████▎            | 5120/10000 [00:04<00:03, 1293.17it/s, NLL=2.47][A
 52%|█████████████▋            | 5248/10000 [00:04<00:03, 1276.95it/s, NLL=2

 15%|███▉                      | 1536/10000 [00:01<00:07, 1118.64it/s, NLL=2.43][A
 17%|████▎                     | 1664/10000 [00:01<00:07, 1118.64it/s, NLL=2.42][A
 18%|████▋                     | 1792/10000 [00:02<00:06, 1192.77it/s, NLL=2.42][A
 18%|████▋                     | 1792/10000 [00:02<00:06, 1192.77it/s, NLL=2.43][A
 19%|████▉                     | 1920/10000 [00:02<00:06, 1192.77it/s, NLL=2.41][A
 20%|█████▎                    | 2048/10000 [00:02<00:06, 1223.83it/s, NLL=2.41][A
 20%|█████▎                    | 2048/10000 [00:02<00:06, 1223.83it/s, NLL=2.41][A
 22%|█████▉                     | 2176/10000 [00:02<00:06, 1223.83it/s, NLL=2.4][A
 23%|██████▏                    | 2304/10000 [00:02<00:06, 1258.54it/s, NLL=2.4][A
 23%|██████▏                    | 2304/10000 [00:02<00:06, 1258.54it/s, NLL=2.4][A
 24%|██████▎                   | 2432/10000 [00:02<00:06, 1258.54it/s, NLL=2.38][A
 26%|██████▋                   | 2560/10000 [00:02<00:06, 1239.69it/s, NLL=2

 95%|████████████████████████▋ | 9472/10000 [00:08<00:00, 1318.21it/s, NLL=2.39][A
 96%|████████████████████████▉ | 9600/10000 [00:08<00:00, 1318.21it/s, NLL=2.39][A
 97%|█████████████████████████▎| 9728/10000 [00:08<00:00, 1325.43it/s, NLL=2.39][A
 97%|█████████████████████████▎| 9728/10000 [00:08<00:00, 1325.43it/s, NLL=2.39][A
 99%|█████████████████████████▋| 9856/10000 [00:08<00:00, 1325.43it/s, NLL=2.38][A
100%|█████████████████████████▉| 9984/10000 [00:08<00:00, 1320.17it/s, NLL=2.38][A
100%|█████████████████████████▉| 9984/10000 [00:08<00:00, 1320.17it/s, NLL=2.38][A
100%|█████████████████████████| 10000/10000 [00:08<00:00, 1164.89it/s, NLL=2.38][A
new best acc of  tensor(0.4124, device='cuda:0') Saving model at:  models/testConvNoRes250240
 10%|█▏          | 300288/2885727 [39:53<4:43:19, 152.09it/s, NLL=2.65, epoch=1]
evaluating model...

  0%|                                                 | 0/10000 [00:00<?, ?it/s][A
  1%|▍                                     | 128

 65%|████████████████▉         | 6528/10000 [00:05<00:02, 1265.44it/s, NLL=2.33][A
 67%|█████████████████▎        | 6656/10000 [00:05<00:02, 1245.50it/s, NLL=2.33][A
 67%|█████████████████▎        | 6656/10000 [00:05<00:02, 1245.50it/s, NLL=2.33][A
 68%|█████████████████▋        | 6784/10000 [00:06<00:02, 1244.82it/s, NLL=2.33][A
 68%|█████████████████▋        | 6784/10000 [00:06<00:02, 1244.82it/s, NLL=2.32][A
 69%|█████████████████▉        | 6912/10000 [00:06<00:02, 1244.82it/s, NLL=2.32][A
 70%|██████████████████▎       | 7040/10000 [00:06<00:02, 1282.48it/s, NLL=2.32][A
 70%|██████████████████▎       | 7040/10000 [00:06<00:02, 1282.48it/s, NLL=2.32][A
 72%|██████████████████▋       | 7168/10000 [00:06<00:02, 1282.48it/s, NLL=2.32][A
 73%|██████████████████▉       | 7296/10000 [00:06<00:02, 1300.84it/s, NLL=2.32][A
 73%|██████████████████▉       | 7296/10000 [00:06<00:02, 1300.84it/s, NLL=2.32][A
 74%|███████████████████▎      | 7424/10000 [00:06<00:01, 1300.84it/s, NLL=2

 42%|███████████▍               | 4224/10000 [00:07<00:07, 781.64it/s, NLL=2.37][A
 44%|███████████▊               | 4352/10000 [00:07<00:07, 781.64it/s, NLL=2.37][A
 45%|████████████               | 4480/10000 [00:07<00:06, 895.15it/s, NLL=2.37][A
 45%|████████████               | 4480/10000 [00:07<00:06, 895.15it/s, NLL=2.37][A
 46%|████████████▍              | 4608/10000 [00:07<00:06, 827.04it/s, NLL=2.37][A
 46%|████████████▍              | 4608/10000 [00:07<00:06, 827.04it/s, NLL=2.36][A
 47%|████████████▊              | 4736/10000 [00:07<00:07, 744.83it/s, NLL=2.36][A
 47%|████████████▊              | 4736/10000 [00:07<00:07, 744.83it/s, NLL=2.36][A
 49%|█████████████▏             | 4864/10000 [00:07<00:06, 744.83it/s, NLL=2.35][A
 50%|█████████████▍             | 4992/10000 [00:08<00:05, 896.62it/s, NLL=2.35][A
 50%|█████████████▍             | 4992/10000 [00:08<00:05, 896.62it/s, NLL=2.36][A
 51%|█████████████▊             | 5120/10000 [00:08<00:05, 856.40it/s, NLL=2

 13%|███▌                        | 1280/10000 [00:02<00:14, 602.98it/s, NLL=2.3][A
 14%|███▊                       | 1408/10000 [00:02<00:14, 602.98it/s, NLL=2.31][A
 15%|████▏                      | 1536/10000 [00:02<00:10, 775.38it/s, NLL=2.31][A
 15%|████▏                      | 1536/10000 [00:02<00:10, 775.38it/s, NLL=2.34][A
 17%|████▍                      | 1664/10000 [00:02<00:10, 829.86it/s, NLL=2.34][A
 17%|████▍                      | 1664/10000 [00:02<00:10, 829.86it/s, NLL=2.33][A
 18%|████▊                      | 1792/10000 [00:03<00:15, 513.59it/s, NLL=2.33][A
 18%|████▊                      | 1792/10000 [00:03<00:15, 513.59it/s, NLL=2.33][A
 19%|█████▏                     | 1920/10000 [00:03<00:15, 513.59it/s, NLL=2.32][A
 20%|█████▌                     | 2048/10000 [00:03<00:11, 688.19it/s, NLL=2.32][A
 20%|█████▌                     | 2048/10000 [00:03<00:11, 688.19it/s, NLL=2.31][A
 22%|█████▉                     | 2176/10000 [00:03<00:11, 688.19it/s, NLL=2

 95%|████████████████████████▋ | 9472/10000 [00:12<00:00, 1035.67it/s, NLL=2.32][A
 96%|████████████████████████▉ | 9600/10000 [00:12<00:00, 1035.67it/s, NLL=2.32][A
 97%|██████████████████████████▎| 9728/10000 [00:12<00:00, 871.51it/s, NLL=2.32][A
 97%|██████████████████████████▎| 9728/10000 [00:12<00:00, 871.51it/s, NLL=2.32][A
 99%|██████████████████████████▌| 9856/10000 [00:12<00:00, 871.51it/s, NLL=2.32][A
100%|██████████████████████████▉| 9984/10000 [00:12<00:00, 984.28it/s, NLL=2.32][A
100%|██████████████████████████▉| 9984/10000 [00:12<00:00, 984.28it/s, NLL=2.32][A
100%|██████████████████████████| 10000/10000 [00:12<00:00, 778.77it/s, NLL=2.32][A
new best acc of  tensor(0.4192, device='cuda:0') Saving model at:  models/testConvNoRes400384
 16%|█▊          | 450432/2885727 [59:20<4:36:19, 146.89it/s, NLL=2.03, epoch=1]
evaluating model...

  0%|                                                 | 0/10000 [00:00<?, ?it/s][A
  1%|▍                                      | 12

 67%|█████████████████▉         | 6656/10000 [00:13<00:04, 744.47it/s, NLL=2.27][A
 68%|██████████████████▎        | 6784/10000 [00:13<00:04, 743.81it/s, NLL=2.27][A
 68%|██████████████████▎        | 6784/10000 [00:13<00:04, 743.81it/s, NLL=2.27][A
 69%|██████████████████▋        | 6912/10000 [00:14<00:04, 646.99it/s, NLL=2.27][A
 69%|██████████████████▋        | 6912/10000 [00:14<00:04, 646.99it/s, NLL=2.27][A
 70%|███████████████████        | 7040/10000 [00:14<00:04, 646.99it/s, NLL=2.27][A
 72%|███████████████████▎       | 7168/10000 [00:14<00:03, 823.65it/s, NLL=2.27][A
 72%|███████████████████▎       | 7168/10000 [00:14<00:03, 823.65it/s, NLL=2.28][A
 73%|███████████████████▋       | 7296/10000 [00:14<00:04, 624.25it/s, NLL=2.28][A
 73%|███████████████████▋       | 7296/10000 [00:14<00:04, 624.25it/s, NLL=2.28][A
 74%|████████████████████       | 7424/10000 [00:14<00:04, 624.25it/s, NLL=2.28][A
 76%|████████████████████▍      | 7552/10000 [00:14<00:03, 792.46it/s, NLL=2

 38%|██████████▎                | 3840/10000 [00:07<00:09, 625.88it/s, NLL=2.29][A
 40%|██████████▋                | 3968/10000 [00:07<00:07, 758.19it/s, NLL=2.29][A
 40%|██████████▋                | 3968/10000 [00:07<00:07, 758.19it/s, NLL=2.28][A
 41%|███████████                | 4096/10000 [00:07<00:07, 758.19it/s, NLL=2.28][A
 42%|███████████▍               | 4224/10000 [00:08<00:08, 682.25it/s, NLL=2.28][A
 42%|███████████▍               | 4224/10000 [00:08<00:08, 682.25it/s, NLL=2.28][A
 44%|███████████▊               | 4352/10000 [00:08<00:08, 682.25it/s, NLL=2.29][A
 45%|████████████               | 4480/10000 [00:08<00:06, 809.13it/s, NLL=2.29][A
 45%|████████████▌               | 4480/10000 [00:08<00:06, 809.13it/s, NLL=2.3][A
 46%|████████████▉               | 4608/10000 [00:08<00:06, 809.13it/s, NLL=2.3][A
 47%|█████████████▎              | 4736/10000 [00:08<00:07, 694.04it/s, NLL=2.3][A
 47%|█████████████▎              | 4736/10000 [00:08<00:07, 694.04it/s, NLL=

 15%|████▏                      | 1536/10000 [00:04<00:16, 514.99it/s, NLL=2.25][A
 17%|████▍                      | 1664/10000 [00:04<00:14, 575.40it/s, NLL=2.25][A
 17%|████▍                      | 1664/10000 [00:04<00:14, 575.40it/s, NLL=2.24][A
 18%|████▊                      | 1792/10000 [00:05<00:36, 222.63it/s, NLL=2.24][A
 18%|████▊                      | 1792/10000 [00:05<00:36, 222.63it/s, NLL=2.25][A
 19%|█████▏                     | 1920/10000 [00:06<00:36, 222.63it/s, NLL=2.25][A
 20%|█████▌                     | 2048/10000 [00:06<00:23, 343.71it/s, NLL=2.25][A
 20%|█████▌                     | 2048/10000 [00:06<00:23, 343.71it/s, NLL=2.24][A
 22%|█████▉                     | 2176/10000 [00:06<00:22, 343.71it/s, NLL=2.24][A
 23%|██████▏                    | 2304/10000 [00:06<00:21, 359.01it/s, NLL=2.24][A
 23%|██████▏                    | 2304/10000 [00:06<00:21, 359.01it/s, NLL=2.25][A
 24%|██████▌                    | 2432/10000 [00:06<00:21, 359.01it/s, NLL=2

 97%|██████████████████████████▎| 9728/10000 [00:17<00:00, 765.89it/s, NLL=2.24][A
 99%|██████████████████████████▌| 9856/10000 [00:17<00:00, 765.89it/s, NLL=2.24][A
100%|██████████████████████████▉| 9984/10000 [00:17<00:00, 554.71it/s, NLL=2.24][A
100%|██████████████████████████▉| 9984/10000 [00:17<00:00, 554.71it/s, NLL=2.25][A
100%|██████████████████████████| 10000/10000 [00:17<00:00, 557.80it/s, NLL=2.25][A
new best acc of  tensor(0.4295, device='cuda:0') Saving model at:  models/testConvNoRes550528
 19%|█▉        | 552704/2885727 [1:13:05<6:25:13, 100.94it/s, NLL=2.37, epoch=1]^C


In [21]:
!python train.py testConvWRes

Tensorboard logs will be saved here:  logs/testConvWRes
using device:  cuda:0
model will be saved here:  models/testConvWRes
getting paths
100%|██████████████████████████████| 319964/319964 [00:00<00:00, 1349741.41it/s]
100%|███████████████████████████████| 319088/319088 [00:00<00:00, 952022.36it/s]
100%|██████████████████████████████| 322751/322751 [00:00<00:00, 1321544.14it/s]
100%|██████████████████████████████| 321571/321571 [00:00<00:00, 1343937.76it/s]
100%|██████████████████████████████| 324844/324844 [00:00<00:00, 1357726.15it/s]
100%|██████████████████████████████| 318545/318545 [00:00<00:00, 1340699.98it/s]
100%|██████████████████████████████| 323455/323455 [00:00<00:00, 1258864.87it/s]
100%|██████████████████████████████| 318112/318112 [00:00<00:00, 1337109.15it/s]
100%|██████████████████████████████| 317397/317397 [00:00<00:00, 1317052.46it/s]
100%|████████████████████████████████| 32505/32505 [00:00<00:00, 1164676.37it/s]
100%|████████████████████████████████| 30276/30276 

 51%|█████████████▊             | 5120/10000 [00:10<00:07, 615.74it/s, NLL=2.51][A
 52%|██████████████▋             | 5248/10000 [00:10<00:07, 615.74it/s, NLL=2.5][A
 54%|███████████████             | 5376/10000 [00:10<00:05, 786.25it/s, NLL=2.5][A
 54%|███████████████             | 5376/10000 [00:10<00:05, 786.25it/s, NLL=2.5][A
 55%|███████████████▍            | 5504/10000 [00:11<00:06, 723.74it/s, NLL=2.5][A
  2%|▏            | 50048/2885727 [06:11<5:56:44, 132.48it/s, NLL=2.64, epoch=1][A
 56%|███████████████▏           | 5632/10000 [00:11<00:08, 509.11it/s, NLL=2.51][A
 56%|███████████████▏           | 5632/10000 [00:11<00:08, 509.11it/s, NLL=2.51][A
 58%|███████████████▌           | 5760/10000 [00:11<00:08, 509.11it/s, NLL=2.51][A
 59%|███████████████▉           | 5888/10000 [00:11<00:06, 683.28it/s, NLL=2.51][A
 59%|███████████████▉           | 5888/10000 [00:11<00:06, 683.28it/s, NLL=2.51][A
 60%|████████████████▏          | 6016/10000 [00:12<00:05, 694.52it/s, NLL=2

 23%|██████▍                     | 2304/10000 [00:05<00:14, 540.26it/s, NLL=2.4][A
 23%|██████▏                    | 2304/10000 [00:05<00:14, 540.26it/s, NLL=2.39][A
 24%|██████▊                     | 2432/10000 [00:05<00:14, 540.26it/s, NLL=2.4][A
 26%|███████▏                    | 2560/10000 [00:06<00:10, 678.36it/s, NLL=2.4][A
 26%|██████▉                    | 2560/10000 [00:06<00:10, 678.36it/s, NLL=2.39][A
 27%|███████▎                   | 2688/10000 [00:06<00:14, 499.63it/s, NLL=2.39][A
 27%|███████▎                   | 2688/10000 [00:06<00:14, 499.63it/s, NLL=2.39][A
 28%|███████▌                   | 2816/10000 [00:06<00:12, 574.51it/s, NLL=2.39][A
 28%|███████▉                    | 2816/10000 [00:06<00:12, 574.51it/s, NLL=2.4][A
 29%|████████▏                   | 2944/10000 [00:06<00:10, 659.39it/s, NLL=2.4][A
 29%|███████▉                   | 2944/10000 [00:06<00:10, 659.39it/s, NLL=2.41][A
 31%|████████▎                  | 3072/10000 [00:06<00:09, 749.82it/s, NLL=2

 93%|█████████████████████████▏ | 9344/10000 [00:15<00:00, 722.08it/s, NLL=2.44][A
 95%|█████████████████████████▌ | 9472/10000 [00:15<00:00, 658.48it/s, NLL=2.44][A
 95%|█████████████████████████▌ | 9472/10000 [00:15<00:00, 658.48it/s, NLL=2.44][A
 96%|█████████████████████████▉ | 9600/10000 [00:16<00:00, 694.00it/s, NLL=2.44][A
 96%|█████████████████████████▉ | 9600/10000 [00:16<00:00, 694.00it/s, NLL=2.43][A
 97%|██████████████████████████▎| 9728/10000 [00:16<00:00, 694.00it/s, NLL=2.43][A
 99%|██████████████████████████▌| 9856/10000 [00:16<00:00, 851.04it/s, NLL=2.43][A
 99%|██████████████████████████▌| 9856/10000 [00:16<00:00, 851.04it/s, NLL=2.43][A
100%|██████████████████████████▉| 9984/10000 [00:16<00:00, 636.63it/s, NLL=2.43][A
100%|██████████████████████████▉| 9984/10000 [00:16<00:00, 636.63it/s, NLL=2.43][A
100%|██████████████████████████| 10000/10000 [00:16<00:00, 595.58it/s, NLL=2.43][A
new best acc of  tensor(0.4058, device='cuda:0') Saving model at:  models/te

 59%|███████████████▎          | 5888/10000 [00:07<00:03, 1058.49it/s, NLL=2.32][A
 60%|███████████████▋          | 6016/10000 [00:08<00:03, 1110.42it/s, NLL=2.32][A
 60%|███████████████▋          | 6016/10000 [00:08<00:03, 1110.42it/s, NLL=2.32][A
 61%|███████████████▉          | 6144/10000 [00:08<00:03, 1155.69it/s, NLL=2.32][A
 61%|███████████████▉          | 6144/10000 [00:08<00:03, 1155.69it/s, NLL=2.32][A
 63%|████████████████▉          | 6272/10000 [00:08<00:04, 853.01it/s, NLL=2.32][A
 63%|████████████████▉          | 6272/10000 [00:08<00:04, 853.01it/s, NLL=2.32][A
 64%|█████████████████▎         | 6400/10000 [00:08<00:03, 939.61it/s, NLL=2.32][A
 64%|█████████████████▎         | 6400/10000 [00:08<00:03, 939.61it/s, NLL=2.33][A
 65%|████████████████▉         | 6528/10000 [00:08<00:03, 1007.74it/s, NLL=2.33][A
 65%|████████████████▉         | 6528/10000 [00:08<00:03, 1007.74it/s, NLL=2.32][A
 67%|█████████████████▎        | 6656/10000 [00:08<00:03, 1046.73it/s, NLL=2

 20%|█████▎                    | 2048/10000 [00:02<00:06, 1144.82it/s, NLL=2.31][A
 22%|█████▋                    | 2176/10000 [00:02<00:06, 1160.06it/s, NLL=2.31][A
 22%|█████▉                     | 2176/10000 [00:02<00:06, 1160.06it/s, NLL=2.3][A
 23%|██████▏                    | 2304/10000 [00:02<00:06, 1152.58it/s, NLL=2.3][A
 23%|█████▉                    | 2304/10000 [00:02<00:06, 1152.58it/s, NLL=2.29][A
 24%|██████▎                   | 2432/10000 [00:02<00:06, 1164.02it/s, NLL=2.29][A
 24%|██████▎                   | 2432/10000 [00:02<00:06, 1164.02it/s, NLL=2.28][A
 26%|██████▋                   | 2560/10000 [00:02<00:06, 1107.58it/s, NLL=2.28][A
 26%|██████▋                   | 2560/10000 [00:02<00:06, 1107.58it/s, NLL=2.28][A
 27%|██████▉                   | 2688/10000 [00:02<00:06, 1062.34it/s, NLL=2.28][A
 27%|██████▉                   | 2688/10000 [00:02<00:06, 1062.34it/s, NLL=2.29][A
 28%|███████▎                  | 2816/10000 [00:03<00:06, 1109.32it/s, NLL=2

 83%|██████████████████████▍    | 8320/10000 [00:07<00:01, 1104.19it/s, NLL=2.3][A
 84%|██████████████████████▊    | 8448/10000 [00:07<00:01, 1150.59it/s, NLL=2.3][A
 84%|██████████████████████▊    | 8448/10000 [00:07<00:01, 1150.59it/s, NLL=2.3][A
 86%|███████████████████████▏   | 8576/10000 [00:07<00:01, 1173.42it/s, NLL=2.3][A
 86%|██████████████████████▎   | 8576/10000 [00:07<00:01, 1173.42it/s, NLL=2.29][A
 87%|██████████████████████▋   | 8704/10000 [00:08<00:01, 1192.23it/s, NLL=2.29][A
 87%|██████████████████████▋   | 8704/10000 [00:08<00:01, 1192.23it/s, NLL=2.29][A
 88%|██████████████████████▉   | 8832/10000 [00:08<00:00, 1197.30it/s, NLL=2.29][A
 88%|██████████████████████▉   | 8832/10000 [00:08<00:00, 1197.30it/s, NLL=2.29][A
 90%|███████████████████████▎  | 8960/10000 [00:08<00:00, 1158.20it/s, NLL=2.29][A
 90%|███████████████████████▎  | 8960/10000 [00:08<00:00, 1158.20it/s, NLL=2.29][A
 91%|███████████████████████▋  | 9088/10000 [00:08<00:00, 1182.21it/s, NLL=2

 49%|█████████████▏             | 4864/10000 [00:07<00:05, 861.09it/s, NLL=2.26][A
 50%|█████████████▍             | 4992/10000 [00:08<00:05, 941.31it/s, NLL=2.26][A
 50%|█████████████▍             | 4992/10000 [00:08<00:05, 941.31it/s, NLL=2.27][A
 51%|█████████████▊             | 5120/10000 [00:08<00:04, 985.46it/s, NLL=2.27][A
 51%|█████████████▊             | 5120/10000 [00:08<00:04, 985.46it/s, NLL=2.26][A
 52%|██████████████▏            | 5248/10000 [00:08<00:07, 663.81it/s, NLL=2.26][A
 52%|██████████████▏            | 5248/10000 [00:08<00:07, 663.81it/s, NLL=2.26][A
 54%|██████████████▌            | 5376/10000 [00:08<00:06, 765.33it/s, NLL=2.26][A
 54%|██████████████▌            | 5376/10000 [00:08<00:06, 765.33it/s, NLL=2.25][A
 55%|██████████████▊            | 5504/10000 [00:08<00:05, 765.33it/s, NLL=2.25][A
 56%|███████████████▏           | 5632/10000 [00:08<00:04, 873.93it/s, NLL=2.25][A
 56%|███████████████▏           | 5632/10000 [00:08<00:04, 873.93it/s, NLL=2

 17%|████▍                      | 1664/10000 [00:03<00:10, 766.23it/s, NLL=2.21][A
 18%|████▊                      | 1792/10000 [00:03<00:12, 682.61it/s, NLL=2.21][A
 18%|████▊                      | 1792/10000 [00:03<00:12, 682.61it/s, NLL=2.22][A
 19%|█████▏                     | 1920/10000 [00:03<00:12, 621.94it/s, NLL=2.22][A
 19%|█████▏                     | 1920/10000 [00:03<00:12, 621.94it/s, NLL=2.23][A
 20%|█████▌                     | 2048/10000 [00:04<00:12, 621.94it/s, NLL=2.24][A
 22%|█████▉                     | 2176/10000 [00:04<00:12, 619.46it/s, NLL=2.24][A
 22%|█████▉                     | 2176/10000 [00:04<00:12, 619.46it/s, NLL=2.23][A
 23%|██████▏                    | 2304/10000 [00:04<00:10, 702.40it/s, NLL=2.23][A
 23%|██████▏                    | 2304/10000 [00:04<00:10, 702.40it/s, NLL=2.24][A
 24%|██████▌                    | 2432/10000 [00:04<00:11, 674.75it/s, NLL=2.24][A
 24%|██████▌                    | 2432/10000 [00:04<00:11, 674.75it/s, NLL=2

 87%|██████████████████████▋   | 8704/10000 [00:12<00:01, 1008.55it/s, NLL=2.27][A
 88%|██████████████████████▉   | 8832/10000 [00:12<00:01, 1016.98it/s, NLL=2.27][A
 88%|██████████████████████▉   | 8832/10000 [00:12<00:01, 1016.98it/s, NLL=2.27][A
 90%|███████████████████████▎  | 8960/10000 [00:12<00:01, 1016.98it/s, NLL=2.27][A
 91%|███████████████████████▋  | 9088/10000 [00:12<00:00, 1110.92it/s, NLL=2.27][A
 91%|███████████████████████▋  | 9088/10000 [00:12<00:00, 1110.92it/s, NLL=2.27][A
 92%|███████████████████████▉  | 9216/10000 [00:12<00:00, 1110.92it/s, NLL=2.27][A
 93%|████████████████████████▎ | 9344/10000 [00:13<00:00, 1039.88it/s, NLL=2.27][A
 93%|████████████████████████▎ | 9344/10000 [00:13<00:00, 1039.88it/s, NLL=2.27][A
 95%|████████████████████████▋ | 9472/10000 [00:13<00:00, 1039.88it/s, NLL=2.27][A
 96%|████████████████████████▉ | 9600/10000 [00:13<00:00, 1115.91it/s, NLL=2.27][A
 96%|████████████████████████▉ | 9600/10000 [00:13<00:00, 1115.91it/s, NLL=2

 58%|███████████████▌           | 5760/10000 [00:19<00:11, 354.72it/s, NLL=2.21][A
 59%|███████████████▉           | 5888/10000 [00:19<00:11, 363.84it/s, NLL=2.21][A
 59%|████████████████▍           | 5888/10000 [00:19<00:11, 363.84it/s, NLL=2.2][A
 60%|████████████████▊           | 6016/10000 [00:19<00:10, 363.84it/s, NLL=2.2][A
 61%|█████████████████▏          | 6144/10000 [00:20<00:07, 490.10it/s, NLL=2.2][A
 61%|█████████████████▏          | 6144/10000 [00:20<00:07, 490.10it/s, NLL=2.2][A
 63%|█████████████████▌          | 6272/10000 [00:20<00:09, 374.11it/s, NLL=2.2][A
 63%|█████████████████▌          | 6272/10000 [00:20<00:09, 374.11it/s, NLL=2.2][A
 64%|█████████████████▉          | 6400/10000 [00:20<00:09, 387.87it/s, NLL=2.2][A
 64%|█████████████████▉          | 6400/10000 [00:20<00:09, 387.87it/s, NLL=2.2][A
 65%|█████████████████▋         | 6528/10000 [00:21<00:08, 387.87it/s, NLL=2.19][A
 67%|█████████████████▉         | 6656/10000 [00:21<00:06, 544.64it/s, NLL=2

 27%|███████▌                    | 2688/10000 [00:12<00:21, 332.62it/s, NLL=2.2][A
 28%|███████▌                   | 2816/10000 [00:12<00:21, 332.62it/s, NLL=2.21][A
 29%|███████▉                   | 2944/10000 [00:12<00:15, 443.71it/s, NLL=2.21][A
 29%|███████▉                   | 2944/10000 [00:12<00:15, 443.71it/s, NLL=2.21][A
 31%|████████▎                  | 3072/10000 [00:12<00:15, 443.71it/s, NLL=2.21][A
 32%|████████▋                  | 3200/10000 [00:13<00:20, 336.16it/s, NLL=2.21][A
 32%|████████▋                  | 3200/10000 [00:13<00:20, 336.16it/s, NLL=2.21][A
 33%|████████▉                  | 3328/10000 [00:13<00:19, 336.16it/s, NLL=2.21][A
 35%|█████████▎                 | 3456/10000 [00:13<00:14, 440.39it/s, NLL=2.21][A
 35%|█████████▋                  | 3456/10000 [00:13<00:14, 440.39it/s, NLL=2.2][A
 36%|██████████                  | 3584/10000 [00:13<00:14, 440.39it/s, NLL=2.2][A
 37%|██████████▍                 | 3712/10000 [00:14<00:16, 370.69it/s, NLL=

 16%|█▊          | 450432/2885727 [55:28<3:50:51, 175.81it/s, NLL=2.37, epoch=1]
evaluating model...

  0%|                                                 | 0/10000 [00:00<?, ?it/s][A
  1%|▍                                      | 128/10000 [00:03<03:54, 42.19it/s][A
  1%|▎                            | 128/10000 [00:03<03:54, 42.19it/s, NLL=2.28][A
  3%|▋                            | 256/10000 [00:03<01:40, 97.14it/s, NLL=2.28][A
  3%|▋                            | 256/10000 [00:03<01:40, 97.14it/s, NLL=2.21][A
  4%|█                            | 384/10000 [00:03<01:38, 97.14it/s, NLL=2.27][A
  5%|█▍                          | 512/10000 [00:03<00:40, 232.11it/s, NLL=2.27][A
  5%|█▍                          | 512/10000 [00:03<00:40, 232.11it/s, NLL=2.22][A
  6%|█▊                          | 640/10000 [00:05<01:12, 129.21it/s, NLL=2.22][A
  6%|█▊                          | 640/10000 [00:05<01:12, 129.21it/s, NLL=2.21][A
  8%|██▏                         | 768/10000 [00:05<00:59,

 70%|███████████████████        | 7040/10000 [00:21<00:08, 363.54it/s, NLL=2.17][A
 72%|███████████████████▎       | 7168/10000 [00:22<00:05, 519.09it/s, NLL=2.17][A
 72%|███████████████████▎       | 7168/10000 [00:22<00:05, 519.09it/s, NLL=2.17][A
 73%|███████████████████▋       | 7296/10000 [00:22<00:06, 408.86it/s, NLL=2.17][A
 73%|███████████████████▋       | 7296/10000 [00:22<00:06, 408.86it/s, NLL=2.17][A
 74%|████████████████████       | 7424/10000 [00:22<00:06, 387.72it/s, NLL=2.17][A
 74%|████████████████████       | 7424/10000 [00:22<00:06, 387.72it/s, NLL=2.17][A
 76%|████████████████████▍      | 7552/10000 [00:23<00:06, 387.72it/s, NLL=2.17][A
 77%|████████████████████▋      | 7680/10000 [00:23<00:04, 545.77it/s, NLL=2.17][A
 77%|████████████████████▋      | 7680/10000 [00:23<00:04, 545.77it/s, NLL=2.16][A
 78%|█████████████████████      | 7808/10000 [00:23<00:05, 389.65it/s, NLL=2.16][A
 78%|█████████████████████      | 7808/10000 [00:23<00:05, 389.65it/s, NLL=2

 38%|██████████▎                | 3840/10000 [00:13<00:15, 391.26it/s, NLL=2.12][A
 40%|██████████▋                | 3968/10000 [00:13<00:15, 391.26it/s, NLL=2.12][A
 41%|███████████                | 4096/10000 [00:13<00:10, 550.33it/s, NLL=2.12][A
 41%|███████████                | 4096/10000 [00:13<00:10, 550.33it/s, NLL=2.12][A
 42%|███████████▍               | 4224/10000 [00:14<00:13, 419.29it/s, NLL=2.12][A
 42%|███████████▍               | 4224/10000 [00:14<00:13, 419.29it/s, NLL=2.12][A
 44%|███████████▊               | 4352/10000 [00:14<00:13, 434.07it/s, NLL=2.12][A
 44%|███████████▊               | 4352/10000 [00:14<00:13, 434.07it/s, NLL=2.11][A
 45%|████████████               | 4480/10000 [00:14<00:12, 434.07it/s, NLL=2.11][A
 46%|████████████▍              | 4608/10000 [00:14<00:09, 598.88it/s, NLL=2.11][A
 46%|████████████▍              | 4608/10000 [00:14<00:09, 598.88it/s, NLL=2.11][A
 47%|████████████▊              | 4736/10000 [00:15<00:12, 435.61it/s, NLL=2

  6%|█▊                          | 640/10000 [00:05<01:10, 133.37it/s, NLL=2.21][A
  8%|██▏                         | 768/10000 [00:05<01:09, 133.37it/s, NLL=2.19][A
  9%|██▌                         | 896/10000 [00:05<00:42, 215.28it/s, NLL=2.19][A
  9%|██▌                         | 896/10000 [00:05<00:42, 215.28it/s, NLL=2.16][A
 10%|██▊                        | 1024/10000 [00:05<00:33, 266.74it/s, NLL=2.16][A
 10%|██▊                        | 1024/10000 [00:05<00:33, 266.74it/s, NLL=2.17][A
 12%|███                        | 1152/10000 [00:07<00:50, 176.38it/s, NLL=2.17][A
 12%|███                        | 1152/10000 [00:07<00:50, 176.38it/s, NLL=2.14][A
 13%|███▍                       | 1280/10000 [00:07<00:38, 227.82it/s, NLL=2.14][A
 13%|███▍                       | 1280/10000 [00:07<00:38, 227.82it/s, NLL=2.13][A
 14%|███▊                       | 1408/10000 [00:07<00:37, 227.82it/s, NLL=2.13][A
 15%|████▏                      | 1536/10000 [00:07<00:23, 352.92it/s, NLL=2

 78%|█████████████████████      | 7808/10000 [00:23<00:04, 517.98it/s, NLL=2.14][A
 79%|█████████████████████▍     | 7936/10000 [00:24<00:06, 343.47it/s, NLL=2.14][A
 79%|█████████████████████▍     | 7936/10000 [00:24<00:06, 343.47it/s, NLL=2.14][A
 81%|█████████████████████▊     | 8064/10000 [00:24<00:04, 422.65it/s, NLL=2.14][A
 81%|█████████████████████▊     | 8064/10000 [00:24<00:04, 422.65it/s, NLL=2.13][A
 82%|██████████████████████     | 8192/10000 [00:24<00:04, 422.65it/s, NLL=2.13][A
 83%|██████████████████████▍    | 8320/10000 [00:24<00:02, 588.58it/s, NLL=2.13][A
 83%|██████████████████████▍    | 8320/10000 [00:24<00:02, 588.58it/s, NLL=2.13][A
 84%|██████████████████████▊    | 8448/10000 [00:25<00:04, 367.37it/s, NLL=2.13][A
 84%|██████████████████████▊    | 8448/10000 [00:25<00:04, 367.37it/s, NLL=2.13][A
 86%|███████████████████████▏   | 8576/10000 [00:25<00:03, 367.37it/s, NLL=2.13][A
 87%|███████████████████████▌   | 8704/10000 [00:25<00:02, 518.78it/s, NLL=2

In [None]:
# !python train.py convFull