# How Powerful are Graph Neural Networks?

The code is adapted from the [author's codebase](https://github.com/weihua916/powerful-gnns) by Mingyu Derek Ma

## Installation

Download `dataset.zip` from author's Github repo, unzip it and locate it at the same directory as this notebook. Install the following packages if not yet do so.

In [1]:
!pip install torch torchvision torchaudio tqdm numpy networkx sklearn

Collecting torchaudio
[?25l  Downloading https://files.pythonhosted.org/packages/37/16/ecdb9eb09ec6b8133d6c9536ea9e49cd13c9b5873c8488b8b765a39028da/torchaudio-0.7.2-cp37-cp37m-manylinux1_x86_64.whl (7.6MB)
[K     |████████████████████████████████| 7.6MB 4.8MB/s eta 0:00:01     |██████████▌                     | 2.5MB 4.8MB/s eta 0:00:02     |██████████████▌                 | 3.5MB 4.8MB/s eta 0:00:01     |█████████████████████████▋      | 6.1MB 4.8MB/s eta 0:00:01
Collecting sklearn
  Using cached https://files.pythonhosted.org/packages/1e/7a/dbb3be0ce9bd5c8b7e3d87328e79063f8b263b2b1bfa4774cb1147bfcd3f/sklearn-0.0.tar.gz
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25ldone
[?25h  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1316 sha256=7649219d00f6ed6375ef53781cd055a5f6ff037b2e821e6119650c9c4c65d14c
  Stored in directory: /nas/home/mingyuma/.cache/pip/wheels/76/03/bb/589d421d27431bcd2c6da284d5f2286c8e3b2e

In [2]:
import networkx as nx
import numpy as np
import random
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
import numpy as np
from tqdm import tqdm
import sys
sys.path.append("models/")

## Models
Define classes for MLP and GraphCNN

In [3]:
###MLP with lienar output
class MLP(nn.Module):
    def __init__(self, num_layers, input_dim, hidden_dim, output_dim):
        '''
            num_layers: number of layers in the neural networks (EXCLUDING the input layer). If num_layers=1, this reduces to linear model.
            input_dim: dimensionality of input features
            hidden_dim: dimensionality of hidden units at ALL layers
            output_dim: number of classes for prediction
            device: which device to use
        '''
    
        super(MLP, self).__init__()

        self.linear_or_not = True #default is linear model
        self.num_layers = num_layers

        if num_layers < 1:
            raise ValueError("number of layers should be positive!")
        elif num_layers == 1:
            #Linear model
            self.linear = nn.Linear(input_dim, output_dim)
        else:
            #Multi-layer model
            self.linear_or_not = False
            self.linears = torch.nn.ModuleList()
            self.batch_norms = torch.nn.ModuleList()
        
            self.linears.append(nn.Linear(input_dim, hidden_dim))
            for layer in range(num_layers - 2):
                self.linears.append(nn.Linear(hidden_dim, hidden_dim))
            self.linears.append(nn.Linear(hidden_dim, output_dim))

            for layer in range(num_layers - 1):
                self.batch_norms.append(nn.BatchNorm1d((hidden_dim)))

    def forward(self, x):
        if self.linear_or_not:
            #If linear model
            return self.linear(x)
        else:
            #If MLP
            h = x
            for layer in range(self.num_layers - 1):
                h = F.relu(self.batch_norms[layer](self.linears[layer](h)))
            return self.linears[self.num_layers - 1](h)

In [4]:
class GraphCNN(nn.Module):
    def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim, output_dim, final_dropout, learn_eps, graph_pooling_type, neighbor_pooling_type, device):
        '''
            num_layers: number of layers in the neural networks (INCLUDING the input layer)
            num_mlp_layers: number of layers in mlps (EXCLUDING the input layer)
            input_dim: dimensionality of input features
            hidden_dim: dimensionality of hidden units at ALL layers
            output_dim: number of classes for prediction
            final_dropout: dropout ratio on the final linear layer
            learn_eps: If True, learn epsilon to distinguish center nodes from neighboring nodes. If False, aggregate neighbors and center nodes altogether. 
            neighbor_pooling_type: how to aggregate neighbors (mean, average, or max)
            graph_pooling_type: how to aggregate entire nodes in a graph (mean, average)
            device: which device to use
        '''

        super(GraphCNN, self).__init__()

        self.final_dropout = final_dropout
        self.device = device
        self.num_layers = num_layers
        self.graph_pooling_type = graph_pooling_type
        self.neighbor_pooling_type = neighbor_pooling_type
        self.learn_eps = learn_eps
        self.eps = nn.Parameter(torch.zeros(self.num_layers-1))

        ###List of MLPs
        self.mlps = torch.nn.ModuleList()

        ###List of batchnorms applied to the output of MLP (input of the final prediction linear layer)
        self.batch_norms = torch.nn.ModuleList()

        for layer in range(self.num_layers-1):
            if layer == 0:
                self.mlps.append(MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim))
            else:
                self.mlps.append(MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim))

            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))

        #Linear function that maps the hidden representation at dofferemt layers into a prediction score
        self.linears_prediction = torch.nn.ModuleList()
        for layer in range(num_layers):
            if layer == 0:
                self.linears_prediction.append(nn.Linear(input_dim, output_dim))
            else:
                self.linears_prediction.append(nn.Linear(hidden_dim, output_dim))


    def __preprocess_neighbors_maxpool(self, batch_graph):
        ###create padded_neighbor_list in concatenated graph

        #compute the maximum number of neighbors within the graphs in the current minibatch
        max_deg = max([graph.max_neighbor for graph in batch_graph])

        padded_neighbor_list = []
        start_idx = [0]


        for i, graph in enumerate(batch_graph):
            start_idx.append(start_idx[i] + len(graph.g))
            padded_neighbors = []
            for j in range(len(graph.neighbors)):
                #add off-set values to the neighbor indices
                pad = [n + start_idx[i] for n in graph.neighbors[j]]
                #padding, dummy data is assumed to be stored in -1
                pad.extend([-1]*(max_deg - len(pad)))

                #Add center nodes in the maxpooling if learn_eps is False, i.e., aggregate center nodes and neighbor nodes altogether.
                if not self.learn_eps:
                    pad.append(j + start_idx[i])

                padded_neighbors.append(pad)
            padded_neighbor_list.extend(padded_neighbors)

        return torch.LongTensor(padded_neighbor_list)


    def __preprocess_neighbors_sumavepool(self, batch_graph):
        ###create block diagonal sparse matrix

        edge_mat_list = []
        start_idx = [0]
        for i, graph in enumerate(batch_graph):
            start_idx.append(start_idx[i] + len(graph.g))
            edge_mat_list.append(graph.edge_mat + start_idx[i])
        Adj_block_idx = torch.cat(edge_mat_list, 1)
        Adj_block_elem = torch.ones(Adj_block_idx.shape[1])

        #Add self-loops in the adjacency matrix if learn_eps is False, i.e., aggregate center nodes and neighbor nodes altogether.

        if not self.learn_eps:
            num_node = start_idx[-1]
            self_loop_edge = torch.LongTensor([range(num_node), range(num_node)])
            elem = torch.ones(num_node)
            Adj_block_idx = torch.cat([Adj_block_idx, self_loop_edge], 1)
            Adj_block_elem = torch.cat([Adj_block_elem, elem], 0)

        Adj_block = torch.sparse.FloatTensor(Adj_block_idx, Adj_block_elem, torch.Size([start_idx[-1],start_idx[-1]]))

        return Adj_block.to(self.device)


    def __preprocess_graphpool(self, batch_graph):
        ###create sum or average pooling sparse matrix over entire nodes in each graph (num graphs x num nodes)
        
        start_idx = [0]

        #compute the padded neighbor list
        for i, graph in enumerate(batch_graph):
            start_idx.append(start_idx[i] + len(graph.g))

        idx = []
        elem = []
        for i, graph in enumerate(batch_graph):
            ###average pooling
            if self.graph_pooling_type == "average":
                elem.extend([1./len(graph.g)]*len(graph.g))
            
            else:
            ###sum pooling
                elem.extend([1]*len(graph.g))

            idx.extend([[i, j] for j in range(start_idx[i], start_idx[i+1], 1)])
        elem = torch.FloatTensor(elem)
        idx = torch.LongTensor(idx).transpose(0,1)
        graph_pool = torch.sparse.FloatTensor(idx, elem, torch.Size([len(batch_graph), start_idx[-1]]))
        
        return graph_pool.to(self.device)

    def maxpool(self, h, padded_neighbor_list):
        ###Element-wise minimum will never affect max-pooling

        dummy = torch.min(h, dim = 0)[0]
        h_with_dummy = torch.cat([h, dummy.reshape((1, -1)).to(self.device)])
        pooled_rep = torch.max(h_with_dummy[padded_neighbor_list], dim = 1)[0]
        return pooled_rep


    def next_layer_eps(self, h, layer, padded_neighbor_list = None, Adj_block = None):
        ###pooling neighboring nodes and center nodes separately by epsilon reweighting. 

        if self.neighbor_pooling_type == "max":
            ##If max pooling
            pooled = self.maxpool(h, padded_neighbor_list)
        else:
            #If sum or average pooling
            pooled = torch.spmm(Adj_block, h)
            if self.neighbor_pooling_type == "average":
                #If average pooling
                degree = torch.spmm(Adj_block, torch.ones((Adj_block.shape[0], 1)).to(self.device))
                pooled = pooled/degree

        #Reweights the center node representation when aggregating it with its neighbors
        pooled = pooled + (1 + self.eps[layer])*h
        pooled_rep = self.mlps[layer](pooled)
        h = self.batch_norms[layer](pooled_rep)

        #non-linearity
        h = F.relu(h)
        return h


    def next_layer(self, h, layer, padded_neighbor_list = None, Adj_block = None):
        ###pooling neighboring nodes and center nodes altogether  
            
        if self.neighbor_pooling_type == "max":
            ##If max pooling
            pooled = self.maxpool(h, padded_neighbor_list)
        else:
            #If sum or average pooling
            pooled = torch.spmm(Adj_block, h)
            if self.neighbor_pooling_type == "average":
                #If average pooling
                degree = torch.spmm(Adj_block, torch.ones((Adj_block.shape[0], 1)).to(self.device))
                pooled = pooled/degree

        #representation of neighboring and center nodes 
        pooled_rep = self.mlps[layer](pooled)

        h = self.batch_norms[layer](pooled_rep)

        #non-linearity
        h = F.relu(h)
        return h


    def forward(self, batch_graph):
        X_concat = torch.cat([graph.node_features for graph in batch_graph], 0).to(self.device)
        graph_pool = self.__preprocess_graphpool(batch_graph)

        if self.neighbor_pooling_type == "max":
            padded_neighbor_list = self.__preprocess_neighbors_maxpool(batch_graph)
        else:
            Adj_block = self.__preprocess_neighbors_sumavepool(batch_graph)

        #list of hidden representation at each layer (including input)
        hidden_rep = [X_concat]
        h = X_concat

        for layer in range(self.num_layers-1):
            if self.neighbor_pooling_type == "max" and self.learn_eps:
                h = self.next_layer_eps(h, layer, padded_neighbor_list = padded_neighbor_list)
            elif not self.neighbor_pooling_type == "max" and self.learn_eps:
                h = self.next_layer_eps(h, layer, Adj_block = Adj_block)
            elif self.neighbor_pooling_type == "max" and not self.learn_eps:
                h = self.next_layer(h, layer, padded_neighbor_list = padded_neighbor_list)
            elif not self.neighbor_pooling_type == "max" and not self.learn_eps:
                h = self.next_layer(h, layer, Adj_block = Adj_block)

            hidden_rep.append(h)

        score_over_layer = 0
    
        #perform pooling over all nodes in each graph in every layer
        for layer, h in enumerate(hidden_rep):
            pooled_h = torch.spmm(graph_pool, h)
            score_over_layer += F.dropout(self.linears_prediction[layer](pooled_h), self.final_dropout, training = self.training)

        return score_over_layer

## Data Loader

In [5]:
###pass data to model with minibatch during testing to avoid memory overflow (does not perform backpropagation)
def pass_data_iteratively(model, graphs, minibatch_size = 64):
    model.eval()
    output = []
    idx = np.arange(len(graphs))
    for i in range(0, len(graphs), minibatch_size):
        sampled_idx = idx[i:i+minibatch_size]
        if len(sampled_idx) == 0:
            continue
        output.append(model([graphs[j] for j in sampled_idx]).detach())
    return torch.cat(output, 0)

## Utility Functions

In [6]:
class S2VGraph(object):
    def __init__(self, g, label, node_tags=None, node_features=None):
        '''
            g: a networkx graph
            label: an integer graph label
            node_tags: a list of integer node tags
            node_features: a torch float tensor, one-hot representation of the tag that is used as input to neural nets
            edge_mat: a torch long tensor, contain edge list, will be used to create torch sparse tensor
            neighbors: list of neighbors (without self-loop)
        '''
        self.label = label
        self.g = g
        self.node_tags = node_tags
        self.neighbors = []
        self.node_features = 0
        self.edge_mat = 0

        self.max_neighbor = 0


def load_data(dataset, degree_as_tag):
    '''
        dataset: name of dataset
        test_proportion: ratio of test train split
        seed: random seed for random splitting of dataset
    '''

    print('loading data')
    g_list = []
    label_dict = {}
    feat_dict = {}

    with open('dataset/%s/%s.txt' % (dataset, dataset), 'r') as f:
        n_g = int(f.readline().strip())
        for i in range(n_g):
            row = f.readline().strip().split()
            n, l = [int(w) for w in row]
            if not l in label_dict:
                mapped = len(label_dict)
                label_dict[l] = mapped
            g = nx.Graph()
            node_tags = []
            node_features = []
            n_edges = 0
            for j in range(n):
                g.add_node(j)
                row = f.readline().strip().split()
                tmp = int(row[1]) + 2
                if tmp == len(row):
                    # no node attributes
                    row = [int(w) for w in row]
                    attr = None
                else:
                    row, attr = [int(w) for w in row[:tmp]], np.array([float(w) for w in row[tmp:]])
                if not row[0] in feat_dict:
                    mapped = len(feat_dict)
                    feat_dict[row[0]] = mapped
                node_tags.append(feat_dict[row[0]])

                if tmp > len(row):
                    node_features.append(attr)

                n_edges += row[1]
                for k in range(2, len(row)):
                    g.add_edge(j, row[k])

            if node_features != []:
                node_features = np.stack(node_features)
                node_feature_flag = True
            else:
                node_features = None
                node_feature_flag = False

            assert len(g) == n

            g_list.append(S2VGraph(g, l, node_tags))

    #add labels and edge_mat       
    for g in g_list:
        g.neighbors = [[] for i in range(len(g.g))]
        for i, j in g.g.edges():
            g.neighbors[i].append(j)
            g.neighbors[j].append(i)
        degree_list = []
        for i in range(len(g.g)):
            g.neighbors[i] = g.neighbors[i]
            degree_list.append(len(g.neighbors[i]))
        g.max_neighbor = max(degree_list)

        g.label = label_dict[g.label]

        edges = [list(pair) for pair in g.g.edges()]
        edges.extend([[i, j] for j, i in edges])

        deg_list = list(dict(g.g.degree(range(len(g.g)))).values())
        g.edge_mat = torch.LongTensor(edges).transpose(0,1)

    if degree_as_tag:
        for g in g_list:
            g.node_tags = list(dict(g.g.degree).values())

    #Extracting unique tag labels   
    tagset = set([])
    for g in g_list:
        tagset = tagset.union(set(g.node_tags))

    tagset = list(tagset)
    tag2index = {tagset[i]:i for i in range(len(tagset))}

    for g in g_list:
        g.node_features = torch.zeros(len(g.node_tags), len(tagset))
        g.node_features[range(len(g.node_tags)), [tag2index[tag] for tag in g.node_tags]] = 1


    print('# classes: %d' % len(label_dict))
    print('# maximum node tag: %d' % len(tagset))

    print("# data: %d" % len(g_list))

    return g_list, len(label_dict)

def separate_data(graph_list, seed, fold_idx):
    assert 0 <= fold_idx and fold_idx < 10, "fold_idx must be from 0 to 9."
    skf = StratifiedKFold(n_splits=10, shuffle = True, random_state = seed)

    labels = [graph.label for graph in graph_list]
    idx_list = []
    for idx in skf.split(np.zeros(len(labels)), labels):
        idx_list.append(idx)
    train_idx, test_idx = idx_list[fold_idx]

    train_graph_list = [graph_list[i] for i in train_idx]
    test_graph_list = [graph_list[i] for i in test_idx]

    return train_graph_list, test_graph_list

## Train a Epoch

In [7]:
criterion = nn.CrossEntropyLoss()

def train(args, model, device, train_graphs, optimizer, epoch):
    model.train()

    total_iters = args.iters_per_epoch
    pbar = tqdm(range(total_iters), unit='batch')

    loss_accum = 0
    for pos in pbar:
        selected_idx = np.random.permutation(len(train_graphs))[:args.batch_size]

        batch_graph = [train_graphs[idx] for idx in selected_idx]
        output = model(batch_graph)

        labels = torch.LongTensor([graph.label for graph in batch_graph]).to(device)

        #compute loss
        loss = criterion(output, labels)

        #backprop
        if optimizer is not None:
            optimizer.zero_grad()
            loss.backward()         
            optimizer.step()
        

        loss = loss.detach().cpu().numpy()
        loss_accum += loss

        #report
        pbar.set_description('epoch: %d' % (epoch))

    average_loss = loss_accum/total_iters
    print("loss training: %f" % (average_loss))
    
    return average_loss

## Test a Epoch

In [8]:
def test(args, model, device, train_graphs, test_graphs, epoch):
    model.eval()

    output = pass_data_iteratively(model, train_graphs)
    pred = output.max(1, keepdim=True)[1]
    labels = torch.LongTensor([graph.label for graph in train_graphs]).to(device)
    correct = pred.eq(labels.view_as(pred)).sum().cpu().item()
    acc_train = correct / float(len(train_graphs))

    output = pass_data_iteratively(model, test_graphs)
    pred = output.max(1, keepdim=True)[1]
    labels = torch.LongTensor([graph.label for graph in test_graphs]).to(device)
    correct = pred.eq(labels.view_as(pred)).sum().cpu().item()
    acc_test = correct / float(len(test_graphs))

    print("accuracy train: %f test: %f" % (acc_train, acc_test))

    return acc_train, acc_test

## Main Training and Testing Logic

### Initialize arguments, data, model and optimizer
The default argument configuration is for GIN

In [9]:
# Training settings
# Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper.
parser = argparse.ArgumentParser(description='PyTorch graph convolutional neural net for whole-graph classification')
parser.add_argument('--dataset', type=str, default="MUTAG",
                    help='name of dataset (default: MUTAG)')
parser.add_argument('--device', type=int, default=0,
                    help='which gpu to use if any (default: 0)')
parser.add_argument('--batch_size', type=int, default=32,
                    help='input batch size for training (default: 32)')
parser.add_argument('--iters_per_epoch', type=int, default=50,
                    help='number of iterations per each epoch (default: 50)')
parser.add_argument('--epochs', type=int, default=350,
                    help='number of epochs to train (default: 350)')
parser.add_argument('--lr', type=float, default=0.01,
                    help='learning rate (default: 0.01)')
parser.add_argument('--seed', type=int, default=0,
                    help='random seed for splitting the dataset into 10 (default: 0)')
parser.add_argument('--fold_idx', type=int, default=0,
                    help='the index of fold in 10-fold validation. Should be less then 10.')
parser.add_argument('--num_layers', type=int, default=5,
                    help='number of layers INCLUDING the input one (default: 5)')
parser.add_argument('--num_mlp_layers', type=int, default=2,
                    help='number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.')
parser.add_argument('--hidden_dim', type=int, default=64,
                    help='number of hidden units (default: 64)')
parser.add_argument('--final_dropout', type=float, default=0.5,
                    help='final layer dropout (default: 0.5)')
parser.add_argument('--graph_pooling_type', type=str, default="sum", choices=["sum", "average"],
                    help='Pooling for over nodes in a graph: sum or average')
parser.add_argument('--neighbor_pooling_type', type=str, default="sum", choices=["sum", "average", "max"],
                    help='Pooling for over neighboring nodes: sum, average or max')
parser.add_argument('--learn_eps', action="store_true",
                                    help='Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.')
parser.add_argument('--degree_as_tag', action="store_true",
                    help='let the input node features be the degree of nodes (heuristics for unlabeled graph)')
parser.add_argument('--filename', type = str, default = "",
                                    help='output file')
args = parser.parse_args(args=[])

print(args)

#set up seeds and gpu device
torch.manual_seed(0)
np.random.seed(0)    
device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(0)

graphs, num_classes = load_data(args.dataset, args.degree_as_tag)

##10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx.
train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

model = GraphCNN(args.num_layers, args.num_mlp_layers, train_graphs[0].node_features.shape[1], args.hidden_dim, num_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, device).to(device)

optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

Namespace(batch_size=32, dataset='MUTAG', degree_as_tag=False, device=0, epochs=350, filename='', final_dropout=0.5, fold_idx=0, graph_pooling_type='sum', hidden_dim=64, iters_per_epoch=50, learn_eps=False, lr=0.01, neighbor_pooling_type='sum', num_layers=5, num_mlp_layers=2, seed=0)
loading data
# classes: 2
# maximum node tag: 7
# data: 188


### Training

In [10]:
for epoch in range(1, args.epochs + 1):
    scheduler.step()

    avg_loss = train(args, model, device, train_graphs, optimizer, epoch)
    acc_train, acc_test = test(args, model, device, train_graphs, test_graphs, epoch)

    if not args.filename == "":
        with open(args.filename, 'w') as f:
            f.write("%f %f %f" % (avg_loss, acc_train, acc_test))
            f.write("\n")
    print("")

    print(model.eps)

epoch: 1: 100%|██████████| 50/50 [00:02<00:00, 20.76batch/s]
epoch: 2:   8%|▊         | 4/50 [00:00<00:01, 39.89batch/s]

loss training: 2.347738
accuracy train: 0.702381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 2: 100%|██████████| 50/50 [00:01<00:00, 38.83batch/s]
epoch: 3:  10%|█         | 5/50 [00:00<00:01, 40.58batch/s]

loss training: 0.933668
accuracy train: 0.875000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 3: 100%|██████████| 50/50 [00:01<00:00, 36.59batch/s]
epoch: 4:   8%|▊         | 4/50 [00:00<00:01, 38.84batch/s]

loss training: 0.688289
accuracy train: 0.880952 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 4: 100%|██████████| 50/50 [00:01<00:00, 34.30batch/s]
epoch: 5:  10%|█         | 5/50 [00:00<00:01, 39.96batch/s]

loss training: 0.787647
accuracy train: 0.863095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 5: 100%|██████████| 50/50 [00:01<00:00, 38.87batch/s]
epoch: 6:  10%|█         | 5/50 [00:00<00:01, 40.27batch/s]

loss training: 0.406840
accuracy train: 0.773810 test: 0.650000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 6: 100%|██████████| 50/50 [00:01<00:00, 37.48batch/s]
epoch: 7:   8%|▊         | 4/50 [00:00<00:01, 38.14batch/s]

loss training: 0.538956
accuracy train: 0.904762 test: 0.900000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 7: 100%|██████████| 50/50 [00:01<00:00, 37.86batch/s]
epoch: 8:   8%|▊         | 4/50 [00:00<00:01, 39.79batch/s]

loss training: 0.364966
accuracy train: 0.910714 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 8: 100%|██████████| 50/50 [00:01<00:00, 38.19batch/s]
epoch: 9:  10%|█         | 5/50 [00:00<00:01, 40.05batch/s]

loss training: 0.318661
accuracy train: 0.922619 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 9: 100%|██████████| 50/50 [00:01<00:00, 38.41batch/s]
epoch: 10:  10%|█         | 5/50 [00:00<00:01, 39.95batch/s]

loss training: 0.213436
accuracy train: 0.952381 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 10: 100%|██████████| 50/50 [00:01<00:00, 37.88batch/s]
epoch: 11:  10%|█         | 5/50 [00:00<00:01, 40.27batch/s]

loss training: 0.233007
accuracy train: 0.946429 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 11: 100%|██████████| 50/50 [00:01<00:00, 38.00batch/s]
epoch: 12:  10%|█         | 5/50 [00:00<00:01, 40.26batch/s]

loss training: 0.429802
accuracy train: 0.886905 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 12: 100%|██████████| 50/50 [00:01<00:00, 38.68batch/s]
epoch: 13:   6%|▌         | 3/50 [00:00<00:01, 29.89batch/s]

loss training: 0.355327
accuracy train: 0.910714 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 13: 100%|██████████| 50/50 [00:01<00:00, 32.36batch/s]
epoch: 14:   8%|▊         | 4/50 [00:00<00:01, 39.19batch/s]

loss training: 0.282664
accuracy train: 0.934524 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 14: 100%|██████████| 50/50 [00:01<00:00, 38.03batch/s]
epoch: 15:   8%|▊         | 4/50 [00:00<00:01, 39.85batch/s]

loss training: 0.353912
accuracy train: 0.940476 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 15: 100%|██████████| 50/50 [00:01<00:00, 37.83batch/s]
epoch: 16:  10%|█         | 5/50 [00:00<00:01, 40.69batch/s]

loss training: 0.234229
accuracy train: 0.910714 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 16: 100%|██████████| 50/50 [00:01<00:00, 37.71batch/s]
epoch: 17:   8%|▊         | 4/50 [00:00<00:01, 38.92batch/s]

loss training: 0.267967
accuracy train: 0.916667 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 17: 100%|██████████| 50/50 [00:01<00:00, 37.85batch/s]
epoch: 18:  10%|█         | 5/50 [00:00<00:01, 40.38batch/s]

loss training: 0.207644
accuracy train: 0.940476 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 18: 100%|██████████| 50/50 [00:01<00:00, 36.96batch/s]
epoch: 19:   8%|▊         | 4/50 [00:00<00:01, 38.78batch/s]

loss training: 0.225872
accuracy train: 0.934524 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 19: 100%|██████████| 50/50 [00:01<00:00, 36.79batch/s]
epoch: 20:   8%|▊         | 4/50 [00:00<00:01, 38.86batch/s]

loss training: 0.163871
accuracy train: 0.875000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 20: 100%|██████████| 50/50 [00:01<00:00, 36.50batch/s]
epoch: 21:  10%|█         | 5/50 [00:00<00:01, 40.15batch/s]

loss training: 0.180911
accuracy train: 0.952381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 21: 100%|██████████| 50/50 [00:01<00:00, 37.25batch/s]
epoch: 22:   8%|▊         | 4/50 [00:00<00:01, 37.09batch/s]

loss training: 0.143786
accuracy train: 0.952381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 22: 100%|██████████| 50/50 [00:01<00:00, 37.91batch/s]
epoch: 23:   8%|▊         | 4/50 [00:00<00:01, 38.53batch/s]

loss training: 0.269119
accuracy train: 0.785714 test: 0.700000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 23: 100%|██████████| 50/50 [00:01<00:00, 38.27batch/s]
epoch: 24:  10%|█         | 5/50 [00:00<00:01, 40.60batch/s]

loss training: 0.193042
accuracy train: 0.958333 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 24: 100%|██████████| 50/50 [00:01<00:00, 38.73batch/s]
epoch: 25:   8%|▊         | 4/50 [00:00<00:01, 39.09batch/s]

loss training: 0.197566
accuracy train: 0.875000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 25: 100%|██████████| 50/50 [00:01<00:00, 38.61batch/s]
epoch: 26:   8%|▊         | 4/50 [00:00<00:01, 37.72batch/s]

loss training: 0.225117
accuracy train: 0.946429 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 26: 100%|██████████| 50/50 [00:01<00:00, 37.99batch/s]
epoch: 27:  10%|█         | 5/50 [00:00<00:01, 40.73batch/s]

loss training: 0.189388
accuracy train: 0.916667 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 27: 100%|██████████| 50/50 [00:01<00:00, 38.72batch/s]
epoch: 28:  10%|█         | 5/50 [00:00<00:01, 40.51batch/s]

loss training: 0.191499
accuracy train: 0.958333 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 28: 100%|██████████| 50/50 [00:01<00:00, 37.55batch/s]
epoch: 29:  10%|█         | 5/50 [00:00<00:01, 36.63batch/s]

loss training: 0.167618
accuracy train: 0.934524 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 29: 100%|██████████| 50/50 [00:01<00:00, 35.16batch/s]
epoch: 30:  10%|█         | 5/50 [00:00<00:01, 40.28batch/s]

loss training: 0.146007
accuracy train: 0.898810 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 30: 100%|██████████| 50/50 [00:01<00:00, 37.20batch/s]
epoch: 31:   8%|▊         | 4/50 [00:00<00:01, 39.74batch/s]

loss training: 0.160448
accuracy train: 0.946429 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 31: 100%|██████████| 50/50 [00:01<00:00, 34.00batch/s]
epoch: 32:  10%|█         | 5/50 [00:00<00:01, 40.03batch/s]

loss training: 0.139935
accuracy train: 0.952381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 32: 100%|██████████| 50/50 [00:01<00:00, 37.27batch/s]
epoch: 33:  10%|█         | 5/50 [00:00<00:01, 40.23batch/s]

loss training: 0.167288
accuracy train: 0.964286 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 33: 100%|██████████| 50/50 [00:01<00:00, 38.36batch/s]
epoch: 34:   8%|▊         | 4/50 [00:00<00:01, 39.94batch/s]

loss training: 0.166547
accuracy train: 0.970238 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 34: 100%|██████████| 50/50 [00:01<00:00, 39.02batch/s]
epoch: 35:  10%|█         | 5/50 [00:00<00:01, 40.74batch/s]

loss training: 0.139135
accuracy train: 0.976190 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 35: 100%|██████████| 50/50 [00:01<00:00, 28.58batch/s]
epoch: 36:   8%|▊         | 4/50 [00:00<00:01, 39.96batch/s]

loss training: 0.109263
accuracy train: 0.952381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 36: 100%|██████████| 50/50 [00:01<00:00, 37.20batch/s]
epoch: 37:  10%|█         | 5/50 [00:00<00:01, 40.57batch/s]

loss training: 0.160407
accuracy train: 0.375000 test: 0.350000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 37: 100%|██████████| 50/50 [00:01<00:00, 37.45batch/s]
epoch: 38:  10%|█         | 5/50 [00:00<00:01, 40.83batch/s]

loss training: 0.240975
accuracy train: 0.886905 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 38: 100%|██████████| 50/50 [00:01<00:00, 38.11batch/s]
epoch: 39:  10%|█         | 5/50 [00:00<00:01, 40.28batch/s]

loss training: 0.212611
accuracy train: 0.934524 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 39: 100%|██████████| 50/50 [00:01<00:00, 37.68batch/s]
epoch: 40:  10%|█         | 5/50 [00:00<00:01, 39.90batch/s]

loss training: 0.131429
accuracy train: 0.946429 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 40: 100%|██████████| 50/50 [00:01<00:00, 29.48batch/s]
epoch: 41:   0%|          | 0/50 [00:00<?, ?batch/s]

loss training: 0.134703
accuracy train: 0.952381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 41: 100%|██████████| 50/50 [00:01<00:00, 32.85batch/s]
epoch: 42:   8%|▊         | 4/50 [00:00<00:01, 37.81batch/s]

loss training: 0.151356
accuracy train: 0.946429 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 42: 100%|██████████| 50/50 [00:01<00:00, 32.59batch/s]
epoch: 43:   8%|▊         | 4/50 [00:00<00:01, 39.05batch/s]

loss training: 0.104013
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 43: 100%|██████████| 50/50 [00:01<00:00, 36.57batch/s]
epoch: 44:   8%|▊         | 4/50 [00:00<00:01, 37.93batch/s]

loss training: 0.099727
accuracy train: 0.988095 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 44: 100%|██████████| 50/50 [00:01<00:00, 37.75batch/s]
epoch: 45:  10%|█         | 5/50 [00:00<00:01, 40.69batch/s]

loss training: 0.234846
accuracy train: 0.892857 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 45: 100%|██████████| 50/50 [00:01<00:00, 36.50batch/s]
epoch: 46:   8%|▊         | 4/50 [00:00<00:01, 38.94batch/s]

loss training: 0.185297
accuracy train: 0.958333 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 46: 100%|██████████| 50/50 [00:01<00:00, 36.62batch/s]
epoch: 47:   8%|▊         | 4/50 [00:00<00:01, 39.92batch/s]

loss training: 0.124550
accuracy train: 0.833333 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 47: 100%|██████████| 50/50 [00:01<00:00, 38.61batch/s]
epoch: 48:  10%|█         | 5/50 [00:00<00:01, 40.67batch/s]

loss training: 0.140159
accuracy train: 0.952381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 48: 100%|██████████| 50/50 [00:01<00:00, 38.72batch/s]
epoch: 49:   8%|▊         | 4/50 [00:00<00:01, 38.88batch/s]

loss training: 0.109478
accuracy train: 0.958333 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 49: 100%|██████████| 50/50 [00:01<00:00, 38.87batch/s]
epoch: 50:   8%|▊         | 4/50 [00:00<00:01, 38.16batch/s]

loss training: 0.085016
accuracy train: 0.970238 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 50: 100%|██████████| 50/50 [00:01<00:00, 37.29batch/s]
epoch: 51:  10%|█         | 5/50 [00:00<00:01, 40.27batch/s]

loss training: 0.069675
accuracy train: 0.988095 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 51: 100%|██████████| 50/50 [00:01<00:00, 38.46batch/s]
epoch: 52:   8%|▊         | 4/50 [00:00<00:01, 38.77batch/s]

loss training: 0.076335
accuracy train: 0.970238 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 52: 100%|██████████| 50/50 [00:01<00:00, 37.54batch/s]
epoch: 53:   8%|▊         | 4/50 [00:00<00:01, 39.91batch/s]

loss training: 0.064222
accuracy train: 0.982143 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 53: 100%|██████████| 50/50 [00:01<00:00, 39.07batch/s]
epoch: 54:  10%|█         | 5/50 [00:00<00:01, 40.80batch/s]

loss training: 0.091062
accuracy train: 0.982143 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 54: 100%|██████████| 50/50 [00:01<00:00, 38.84batch/s]
epoch: 55:  10%|█         | 5/50 [00:00<00:01, 40.27batch/s]

loss training: 0.078817
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 55: 100%|██████████| 50/50 [00:01<00:00, 38.08batch/s]
epoch: 56:  10%|█         | 5/50 [00:00<00:01, 40.36batch/s]

loss training: 0.072572
accuracy train: 0.982143 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 56: 100%|██████████| 50/50 [00:01<00:00, 38.12batch/s]
epoch: 57:   8%|▊         | 4/50 [00:00<00:01, 32.83batch/s]

loss training: 0.083882
accuracy train: 0.988095 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 57: 100%|██████████| 50/50 [00:01<00:00, 37.66batch/s]
epoch: 58:  10%|█         | 5/50 [00:00<00:01, 40.22batch/s]

loss training: 0.066287
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 58: 100%|██████████| 50/50 [00:01<00:00, 39.18batch/s]
epoch: 59:  10%|█         | 5/50 [00:00<00:01, 40.66batch/s]

loss training: 0.082685
accuracy train: 0.982143 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 59: 100%|██████████| 50/50 [00:01<00:00, 39.26batch/s]
epoch: 60:  10%|█         | 5/50 [00:00<00:01, 34.50batch/s]

loss training: 0.072567
accuracy train: 0.958333 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 60: 100%|██████████| 50/50 [00:01<00:00, 36.46batch/s]
epoch: 61:  10%|█         | 5/50 [00:00<00:01, 40.86batch/s]

loss training: 0.092711
accuracy train: 0.982143 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 61: 100%|██████████| 50/50 [00:01<00:00, 37.67batch/s]
epoch: 62:   8%|▊         | 4/50 [00:00<00:01, 38.28batch/s]

loss training: 0.046870
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 62: 100%|██████████| 50/50 [00:01<00:00, 38.83batch/s]
epoch: 63:  10%|█         | 5/50 [00:00<00:01, 40.60batch/s]

loss training: 0.052248
accuracy train: 0.988095 test: 0.700000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 63: 100%|██████████| 50/50 [00:01<00:00, 38.70batch/s]
epoch: 64:  10%|█         | 5/50 [00:00<00:01, 40.76batch/s]

loss training: 0.054875
accuracy train: 0.994048 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 64: 100%|██████████| 50/50 [00:01<00:00, 38.51batch/s]
epoch: 65:   8%|▊         | 4/50 [00:00<00:01, 39.89batch/s]

loss training: 0.062367
accuracy train: 0.982143 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 65: 100%|██████████| 50/50 [00:01<00:00, 38.24batch/s]
epoch: 66:  10%|█         | 5/50 [00:00<00:01, 40.97batch/s]

loss training: 0.077682
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 66: 100%|██████████| 50/50 [00:01<00:00, 38.49batch/s]
epoch: 67:   8%|▊         | 4/50 [00:00<00:01, 37.95batch/s]

loss training: 0.070135
accuracy train: 0.994048 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 67: 100%|██████████| 50/50 [00:01<00:00, 38.08batch/s]
epoch: 68:   8%|▊         | 4/50 [00:00<00:01, 33.22batch/s]

loss training: 0.056177
accuracy train: 0.994048 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 68: 100%|██████████| 50/50 [00:01<00:00, 37.24batch/s]
epoch: 69:   8%|▊         | 4/50 [00:00<00:01, 39.93batch/s]

loss training: 0.052274
accuracy train: 0.988095 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 69: 100%|██████████| 50/50 [00:01<00:00, 38.20batch/s]
epoch: 70:  10%|█         | 5/50 [00:00<00:01, 40.41batch/s]

loss training: 0.061231
accuracy train: 0.976190 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 70: 100%|██████████| 50/50 [00:01<00:00, 37.69batch/s]
epoch: 71:  10%|█         | 5/50 [00:00<00:01, 40.55batch/s]

loss training: 0.060182
accuracy train: 0.982143 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 71: 100%|██████████| 50/50 [00:01<00:00, 39.43batch/s]
epoch: 72:  10%|█         | 5/50 [00:00<00:01, 40.45batch/s]

loss training: 0.175101
accuracy train: 0.988095 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 72: 100%|██████████| 50/50 [00:01<00:00, 37.58batch/s]
epoch: 73:  10%|█         | 5/50 [00:00<00:01, 40.24batch/s]

loss training: 0.156580
accuracy train: 0.886905 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 73: 100%|██████████| 50/50 [00:01<00:00, 39.25batch/s]
epoch: 74:  10%|█         | 5/50 [00:00<00:01, 40.88batch/s]

loss training: 0.152074
accuracy train: 0.976190 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 74: 100%|██████████| 50/50 [00:01<00:00, 36.23batch/s]
epoch: 75:  10%|█         | 5/50 [00:00<00:01, 40.56batch/s]

loss training: 0.061354
accuracy train: 0.988095 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 75: 100%|██████████| 50/50 [00:01<00:00, 38.19batch/s]
epoch: 76:   8%|▊         | 4/50 [00:00<00:01, 39.03batch/s]

loss training: 0.068213
accuracy train: 0.988095 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 76: 100%|██████████| 50/50 [00:01<00:00, 38.51batch/s]
epoch: 77:   8%|▊         | 4/50 [00:00<00:01, 38.95batch/s]

loss training: 0.072440
accuracy train: 0.964286 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 77: 100%|██████████| 50/50 [00:01<00:00, 35.03batch/s]
epoch: 78:   8%|▊         | 4/50 [00:00<00:01, 36.91batch/s]

loss training: 0.058838
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 78: 100%|██████████| 50/50 [00:01<00:00, 38.15batch/s]
epoch: 79:   8%|▊         | 4/50 [00:00<00:01, 39.23batch/s]

loss training: 0.059331
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 79: 100%|██████████| 50/50 [00:01<00:00, 38.12batch/s]
epoch: 80:  10%|█         | 5/50 [00:00<00:01, 39.83batch/s]

loss training: 0.052702
accuracy train: 0.976190 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 80: 100%|██████████| 50/50 [00:01<00:00, 33.68batch/s]
epoch: 81:  10%|█         | 5/50 [00:00<00:01, 39.68batch/s]

loss training: 0.038681
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 81: 100%|██████████| 50/50 [00:01<00:00, 38.69batch/s]
epoch: 82:   6%|▌         | 3/50 [00:00<00:01, 25.97batch/s]

loss training: 0.050047
accuracy train: 0.988095 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 82: 100%|██████████| 50/50 [00:01<00:00, 37.09batch/s]
epoch: 83:   8%|▊         | 4/50 [00:00<00:01, 39.86batch/s]

loss training: 0.081172
accuracy train: 1.000000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 83: 100%|██████████| 50/50 [00:01<00:00, 38.27batch/s]
epoch: 84:  10%|█         | 5/50 [00:00<00:01, 40.09batch/s]

loss training: 0.077225
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 84: 100%|██████████| 50/50 [00:01<00:00, 38.06batch/s]
epoch: 85:  10%|█         | 5/50 [00:00<00:01, 37.60batch/s]

loss training: 0.047108
accuracy train: 0.982143 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 85: 100%|██████████| 50/50 [00:01<00:00, 37.29batch/s]
epoch: 86:   8%|▊         | 4/50 [00:00<00:01, 39.71batch/s]

loss training: 0.031501
accuracy train: 0.994048 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 86: 100%|██████████| 50/50 [00:01<00:00, 38.69batch/s]
epoch: 87:  10%|█         | 5/50 [00:00<00:01, 40.45batch/s]

loss training: 0.080605
accuracy train: 0.994048 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 87: 100%|██████████| 50/50 [00:01<00:00, 39.41batch/s]
epoch: 88:   8%|▊         | 4/50 [00:00<00:01, 37.84batch/s]

loss training: 0.051901
accuracy train: 0.970238 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 88: 100%|██████████| 50/50 [00:01<00:00, 38.35batch/s]
epoch: 89:   8%|▊         | 4/50 [00:00<00:01, 38.43batch/s]

loss training: 0.125937
accuracy train: 0.928571 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 89: 100%|██████████| 50/50 [00:01<00:00, 37.94batch/s]
epoch: 90:  10%|█         | 5/50 [00:00<00:01, 40.36batch/s]

loss training: 0.280548
accuracy train: 0.970238 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 90: 100%|██████████| 50/50 [00:01<00:00, 38.83batch/s]
epoch: 91:  10%|█         | 5/50 [00:00<00:01, 40.92batch/s]

loss training: 0.135505
accuracy train: 0.994048 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 91: 100%|██████████| 50/50 [00:01<00:00, 39.42batch/s]
epoch: 92:  10%|█         | 5/50 [00:00<00:01, 40.04batch/s]

loss training: 0.090915
accuracy train: 0.982143 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 92: 100%|██████████| 50/50 [00:01<00:00, 37.90batch/s]
epoch: 93:   8%|▊         | 4/50 [00:00<00:01, 38.98batch/s]

loss training: 0.077573
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 93: 100%|██████████| 50/50 [00:01<00:00, 37.75batch/s]
epoch: 94:   8%|▊         | 4/50 [00:00<00:01, 38.66batch/s]

loss training: 0.088945
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 94: 100%|██████████| 50/50 [00:01<00:00, 38.30batch/s]
epoch: 95:  10%|█         | 5/50 [00:00<00:01, 40.37batch/s]

loss training: 0.079907
accuracy train: 0.994048 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 95: 100%|██████████| 50/50 [00:01<00:00, 39.01batch/s]
epoch: 96:  10%|█         | 5/50 [00:00<00:01, 40.77batch/s]

loss training: 0.077677
accuracy train: 1.000000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 96: 100%|██████████| 50/50 [00:01<00:00, 38.01batch/s]
epoch: 97:  10%|█         | 5/50 [00:00<00:01, 41.05batch/s]

loss training: 0.045901
accuracy train: 0.994048 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 97: 100%|██████████| 50/50 [00:01<00:00, 39.76batch/s]
epoch: 98:  10%|█         | 5/50 [00:00<00:01, 40.61batch/s]

loss training: 0.046276
accuracy train: 1.000000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 98: 100%|██████████| 50/50 [00:01<00:00, 37.69batch/s]
epoch: 99:  10%|█         | 5/50 [00:00<00:01, 40.43batch/s]

loss training: 0.075300
accuracy train: 0.976190 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 99: 100%|██████████| 50/50 [00:01<00:00, 38.35batch/s]
epoch: 100:  10%|█         | 5/50 [00:00<00:01, 40.23batch/s]

loss training: 0.169046
accuracy train: 0.904762 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 100: 100%|██████████| 50/50 [00:01<00:00, 38.44batch/s]
epoch: 101:  10%|█         | 5/50 [00:00<00:01, 40.79batch/s]

loss training: 0.164344
accuracy train: 0.952381 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 101: 100%|██████████| 50/50 [00:01<00:00, 37.88batch/s]
epoch: 102:  10%|█         | 5/50 [00:00<00:01, 40.14batch/s]

loss training: 0.088448
accuracy train: 0.994048 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 102: 100%|██████████| 50/50 [00:01<00:00, 39.33batch/s]
epoch: 103:  10%|█         | 5/50 [00:00<00:01, 40.83batch/s]

loss training: 0.056343
accuracy train: 0.988095 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 103: 100%|██████████| 50/50 [00:01<00:00, 38.60batch/s]
epoch: 104:  10%|█         | 5/50 [00:00<00:01, 41.01batch/s]

loss training: 0.052435
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 104: 100%|██████████| 50/50 [00:01<00:00, 39.18batch/s]
epoch: 105:   8%|▊         | 4/50 [00:00<00:01, 37.32batch/s]

loss training: 0.034811
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 105: 100%|██████████| 50/50 [00:01<00:00, 38.74batch/s]
epoch: 106:  10%|█         | 5/50 [00:00<00:01, 40.80batch/s]

loss training: 0.029655
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 106: 100%|██████████| 50/50 [00:01<00:00, 38.90batch/s]
epoch: 107:   8%|▊         | 4/50 [00:00<00:01, 39.09batch/s]

loss training: 0.030537
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 107: 100%|██████████| 50/50 [00:01<00:00, 38.06batch/s]
epoch: 108:  10%|█         | 5/50 [00:00<00:01, 40.32batch/s]

loss training: 0.036321
accuracy train: 0.994048 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 108: 100%|██████████| 50/50 [00:01<00:00, 39.14batch/s]
epoch: 109:  10%|█         | 5/50 [00:00<00:01, 39.94batch/s]

loss training: 0.039135
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 109: 100%|██████████| 50/50 [00:01<00:00, 36.92batch/s]
epoch: 110:  10%|█         | 5/50 [00:00<00:01, 39.60batch/s]

loss training: 0.024275
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 110: 100%|██████████| 50/50 [00:01<00:00, 37.62batch/s]
epoch: 111:   8%|▊         | 4/50 [00:00<00:01, 39.94batch/s]

loss training: 0.028321
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 111: 100%|██████████| 50/50 [00:01<00:00, 34.64batch/s]
epoch: 112:  10%|█         | 5/50 [00:00<00:01, 40.24batch/s]

loss training: 0.033552
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 112: 100%|██████████| 50/50 [00:01<00:00, 38.02batch/s]
epoch: 113:   8%|▊         | 4/50 [00:00<00:01, 38.85batch/s]

loss training: 0.019250
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 113: 100%|██████████| 50/50 [00:01<00:00, 38.24batch/s]
epoch: 114:   8%|▊         | 4/50 [00:00<00:01, 38.64batch/s]

loss training: 0.038007
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 114: 100%|██████████| 50/50 [00:01<00:00, 38.84batch/s]
epoch: 115:   8%|▊         | 4/50 [00:00<00:01, 40.00batch/s]

loss training: 0.028965
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 115: 100%|██████████| 50/50 [00:01<00:00, 36.86batch/s]
epoch: 116:   8%|▊         | 4/50 [00:00<00:01, 36.44batch/s]

loss training: 0.018638
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 116: 100%|██████████| 50/50 [00:01<00:00, 38.90batch/s]
epoch: 117:  10%|█         | 5/50 [00:00<00:01, 41.00batch/s]

loss training: 0.033343
accuracy train: 0.988095 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 117: 100%|██████████| 50/50 [00:01<00:00, 38.74batch/s]
epoch: 118:   8%|▊         | 4/50 [00:00<00:01, 37.08batch/s]

loss training: 0.072344
accuracy train: 0.994048 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 118: 100%|██████████| 50/50 [00:01<00:00, 38.48batch/s]
epoch: 119:  10%|█         | 5/50 [00:00<00:01, 40.25batch/s]

loss training: 0.041012
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 119: 100%|██████████| 50/50 [00:01<00:00, 39.08batch/s]
epoch: 120:   8%|▊         | 4/50 [00:00<00:01, 38.64batch/s]

loss training: 0.028381
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 120: 100%|██████████| 50/50 [00:01<00:00, 38.45batch/s]
epoch: 121:  10%|█         | 5/50 [00:00<00:01, 40.38batch/s]

loss training: 0.019449
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 121: 100%|██████████| 50/50 [00:01<00:00, 37.87batch/s]
epoch: 122:   8%|▊         | 4/50 [00:00<00:01, 39.21batch/s]

loss training: 0.013031
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 122: 100%|██████████| 50/50 [00:01<00:00, 37.93batch/s]
epoch: 123:  10%|█         | 5/50 [00:00<00:01, 40.55batch/s]

loss training: 0.020878
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 123: 100%|██████████| 50/50 [00:01<00:00, 39.40batch/s]
epoch: 124:  10%|█         | 5/50 [00:00<00:01, 40.42batch/s]

loss training: 0.017222
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 124: 100%|██████████| 50/50 [00:01<00:00, 36.78batch/s]
epoch: 125:   8%|▊         | 4/50 [00:00<00:01, 31.15batch/s]

loss training: 0.014698
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 125: 100%|██████████| 50/50 [00:01<00:00, 37.32batch/s]
epoch: 126:  10%|█         | 5/50 [00:00<00:01, 40.63batch/s]

loss training: 0.014848
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 126: 100%|██████████| 50/50 [00:01<00:00, 35.61batch/s]
epoch: 127:  10%|█         | 5/50 [00:00<00:01, 40.34batch/s]

loss training: 0.020625
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 127: 100%|██████████| 50/50 [00:01<00:00, 37.86batch/s]
epoch: 128:  10%|█         | 5/50 [00:00<00:01, 40.35batch/s]

loss training: 0.016497
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 128: 100%|██████████| 50/50 [00:01<00:00, 38.60batch/s]
epoch: 129:  10%|█         | 5/50 [00:00<00:01, 39.48batch/s]

loss training: 0.013557
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 129: 100%|██████████| 50/50 [00:01<00:00, 38.18batch/s]
epoch: 130:   8%|▊         | 4/50 [00:00<00:01, 39.17batch/s]

loss training: 0.013913
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 130: 100%|██████████| 50/50 [00:01<00:00, 36.11batch/s]
epoch: 131:   8%|▊         | 4/50 [00:00<00:01, 37.95batch/s]

loss training: 0.020426
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 131: 100%|██████████| 50/50 [00:01<00:00, 37.25batch/s]
epoch: 132:   8%|▊         | 4/50 [00:00<00:01, 39.60batch/s]

loss training: 0.013868
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 132: 100%|██████████| 50/50 [00:01<00:00, 39.16batch/s]
epoch: 133:  10%|█         | 5/50 [00:00<00:01, 40.71batch/s]

loss training: 0.022866
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 133: 100%|██████████| 50/50 [00:01<00:00, 38.81batch/s]
epoch: 134:   8%|▊         | 4/50 [00:00<00:01, 39.59batch/s]

loss training: 0.029577
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 134: 100%|██████████| 50/50 [00:01<00:00, 38.21batch/s]
epoch: 135:   8%|▊         | 4/50 [00:00<00:01, 36.45batch/s]

loss training: 0.014369
accuracy train: 1.000000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 135: 100%|██████████| 50/50 [00:01<00:00, 35.48batch/s]
epoch: 136:  10%|█         | 5/50 [00:00<00:01, 40.62batch/s]

loss training: 0.025642
accuracy train: 1.000000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 136: 100%|██████████| 50/50 [00:01<00:00, 38.91batch/s]
epoch: 137:  10%|█         | 5/50 [00:00<00:01, 40.45batch/s]

loss training: 0.016841
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 137: 100%|██████████| 50/50 [00:01<00:00, 38.16batch/s]
epoch: 138:   8%|▊         | 4/50 [00:00<00:01, 38.96batch/s]

loss training: 0.018987
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 138: 100%|██████████| 50/50 [00:01<00:00, 37.96batch/s]
epoch: 139:   8%|▊         | 4/50 [00:00<00:01, 38.03batch/s]

loss training: 0.014275
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 139: 100%|██████████| 50/50 [00:01<00:00, 37.82batch/s]
epoch: 140:  10%|█         | 5/50 [00:00<00:01, 40.40batch/s]

loss training: 0.012035
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 140: 100%|██████████| 50/50 [00:01<00:00, 39.15batch/s]
epoch: 141:   8%|▊         | 4/50 [00:00<00:01, 38.99batch/s]

loss training: 0.018227
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 141: 100%|██████████| 50/50 [00:01<00:00, 39.43batch/s]
epoch: 142:  10%|█         | 5/50 [00:00<00:01, 40.63batch/s]

loss training: 0.026496
accuracy train: 0.982143 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 142: 100%|██████████| 50/50 [00:01<00:00, 38.45batch/s]
epoch: 143:  10%|█         | 5/50 [00:00<00:01, 40.50batch/s]

loss training: 0.027579
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 143: 100%|██████████| 50/50 [00:01<00:00, 38.81batch/s]
epoch: 144:   8%|▊         | 4/50 [00:00<00:01, 38.41batch/s]

loss training: 0.019370
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 144: 100%|██████████| 50/50 [00:01<00:00, 39.19batch/s]
epoch: 145:   8%|▊         | 4/50 [00:00<00:01, 39.97batch/s]

loss training: 0.019698
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 145: 100%|██████████| 50/50 [00:01<00:00, 39.34batch/s]
epoch: 146:   6%|▌         | 3/50 [00:00<00:01, 28.73batch/s]

loss training: 0.028139
accuracy train: 0.994048 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 146: 100%|██████████| 50/50 [00:01<00:00, 37.75batch/s]
epoch: 147:  10%|█         | 5/50 [00:00<00:01, 40.36batch/s]

loss training: 0.024623
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 147: 100%|██████████| 50/50 [00:01<00:00, 37.85batch/s]
epoch: 148:   8%|▊         | 4/50 [00:00<00:01, 39.22batch/s]

loss training: 0.014560
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 148: 100%|██████████| 50/50 [00:01<00:00, 38.51batch/s]
epoch: 149:   8%|▊         | 4/50 [00:00<00:01, 36.56batch/s]

loss training: 0.011033
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 149: 100%|██████████| 50/50 [00:01<00:00, 37.91batch/s]
epoch: 150:   8%|▊         | 4/50 [00:00<00:01, 39.87batch/s]

loss training: 0.015508
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 150: 100%|██████████| 50/50 [00:01<00:00, 37.80batch/s]
epoch: 151:   8%|▊         | 4/50 [00:00<00:01, 38.37batch/s]

loss training: 0.015506
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 151: 100%|██████████| 50/50 [00:01<00:00, 34.68batch/s]
epoch: 152:   8%|▊         | 4/50 [00:00<00:01, 39.20batch/s]

loss training: 0.017861
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 152: 100%|██████████| 50/50 [00:01<00:00, 30.30batch/s]
epoch: 153:   8%|▊         | 4/50 [00:00<00:01, 35.36batch/s]

loss training: 0.014073
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 153: 100%|██████████| 50/50 [00:01<00:00, 32.49batch/s]
epoch: 154:   0%|          | 0/50 [00:00<?, ?batch/s]

loss training: 0.010198
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 154: 100%|██████████| 50/50 [00:01<00:00, 37.06batch/s]
epoch: 155:   8%|▊         | 4/50 [00:00<00:01, 37.29batch/s]

loss training: 0.013984
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 155: 100%|██████████| 50/50 [00:01<00:00, 33.66batch/s]
epoch: 156:   8%|▊         | 4/50 [00:00<00:01, 39.47batch/s]

loss training: 0.011167
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 156: 100%|██████████| 50/50 [00:01<00:00, 38.42batch/s]
epoch: 157:  10%|█         | 5/50 [00:00<00:01, 40.82batch/s]

loss training: 0.010429
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 157: 100%|██████████| 50/50 [00:01<00:00, 33.70batch/s]
epoch: 158:   8%|▊         | 4/50 [00:00<00:01, 37.17batch/s]

loss training: 0.014619
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 158: 100%|██████████| 50/50 [00:01<00:00, 36.92batch/s]
epoch: 159:   8%|▊         | 4/50 [00:00<00:01, 38.15batch/s]

loss training: 0.015434
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 159: 100%|██████████| 50/50 [00:01<00:00, 36.33batch/s]
epoch: 160:   6%|▌         | 3/50 [00:00<00:01, 30.00batch/s]

loss training: 0.014422
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 160: 100%|██████████| 50/50 [00:01<00:00, 36.76batch/s]
epoch: 161:   8%|▊         | 4/50 [00:00<00:01, 39.75batch/s]

loss training: 0.014564
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 161: 100%|██████████| 50/50 [00:01<00:00, 38.01batch/s]
epoch: 162:   8%|▊         | 4/50 [00:00<00:01, 36.64batch/s]

loss training: 0.016874
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 162: 100%|██████████| 50/50 [00:01<00:00, 37.91batch/s]
epoch: 163:   8%|▊         | 4/50 [00:00<00:01, 39.30batch/s]

loss training: 0.012754
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 163: 100%|██████████| 50/50 [00:01<00:00, 37.49batch/s]
epoch: 164:   8%|▊         | 4/50 [00:00<00:01, 39.92batch/s]

loss training: 0.016765
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 164: 100%|██████████| 50/50 [00:01<00:00, 32.82batch/s]
epoch: 165:   8%|▊         | 4/50 [00:00<00:01, 38.37batch/s]

loss training: 0.007692
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 165: 100%|██████████| 50/50 [00:01<00:00, 37.47batch/s]
epoch: 166:   8%|▊         | 4/50 [00:00<00:01, 36.90batch/s]

loss training: 0.009581
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 166: 100%|██████████| 50/50 [00:01<00:00, 38.61batch/s]
epoch: 167:  10%|█         | 5/50 [00:00<00:01, 39.68batch/s]

loss training: 0.036596
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 167: 100%|██████████| 50/50 [00:01<00:00, 36.64batch/s]
epoch: 168:   8%|▊         | 4/50 [00:00<00:01, 38.89batch/s]

loss training: 0.011263
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 168: 100%|██████████| 50/50 [00:01<00:00, 38.43batch/s]
epoch: 169:   8%|▊         | 4/50 [00:00<00:01, 39.08batch/s]

loss training: 0.021208
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 169: 100%|██████████| 50/50 [00:01<00:00, 38.39batch/s]
epoch: 170:  10%|█         | 5/50 [00:00<00:01, 40.73batch/s]

loss training: 0.020400
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 170: 100%|██████████| 50/50 [00:01<00:00, 39.28batch/s]
epoch: 171:   8%|▊         | 4/50 [00:00<00:01, 38.37batch/s]

loss training: 0.013511
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 171: 100%|██████████| 50/50 [00:01<00:00, 38.82batch/s]
epoch: 172:  10%|█         | 5/50 [00:00<00:01, 40.24batch/s]

loss training: 0.009809
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 172: 100%|██████████| 50/50 [00:01<00:00, 39.14batch/s]
epoch: 173:   8%|▊         | 4/50 [00:00<00:01, 36.51batch/s]

loss training: 0.013022
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 173: 100%|██████████| 50/50 [00:01<00:00, 39.12batch/s]
epoch: 174:  10%|█         | 5/50 [00:00<00:01, 40.60batch/s]

loss training: 0.009213
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 174: 100%|██████████| 50/50 [00:01<00:00, 39.42batch/s]
epoch: 175:   8%|▊         | 4/50 [00:00<00:01, 39.88batch/s]

loss training: 0.013224
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 175: 100%|██████████| 50/50 [00:01<00:00, 37.70batch/s]
epoch: 176:  10%|█         | 5/50 [00:00<00:01, 40.66batch/s]

loss training: 0.012001
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 176: 100%|██████████| 50/50 [00:01<00:00, 38.38batch/s]
epoch: 177:   8%|▊         | 4/50 [00:00<00:01, 36.72batch/s]

loss training: 0.013906
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 177: 100%|██████████| 50/50 [00:01<00:00, 37.10batch/s]
epoch: 178:   8%|▊         | 4/50 [00:00<00:01, 35.97batch/s]

loss training: 0.012983
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 178: 100%|██████████| 50/50 [00:01<00:00, 37.72batch/s]
epoch: 179:  10%|█         | 5/50 [00:00<00:01, 40.71batch/s]

loss training: 0.014577
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 179: 100%|██████████| 50/50 [00:01<00:00, 34.46batch/s]
epoch: 180:   8%|▊         | 4/50 [00:00<00:01, 31.36batch/s]

loss training: 0.009590
accuracy train: 1.000000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 180: 100%|██████████| 50/50 [00:01<00:00, 33.37batch/s]
epoch: 181:   8%|▊         | 4/50 [00:00<00:01, 38.48batch/s]

loss training: 0.009010
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 181: 100%|██████████| 50/50 [00:01<00:00, 35.68batch/s]
epoch: 182:   8%|▊         | 4/50 [00:00<00:01, 38.28batch/s]

loss training: 0.007700
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 182: 100%|██████████| 50/50 [00:01<00:00, 37.76batch/s]
epoch: 183:   8%|▊         | 4/50 [00:00<00:01, 38.61batch/s]

loss training: 0.017132
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 183: 100%|██████████| 50/50 [00:01<00:00, 39.02batch/s]
epoch: 184:   8%|▊         | 4/50 [00:00<00:01, 37.39batch/s]

loss training: 0.013991
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 184: 100%|██████████| 50/50 [00:01<00:00, 36.86batch/s]
epoch: 185:   8%|▊         | 4/50 [00:00<00:01, 37.04batch/s]

loss training: 0.020508
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 185: 100%|██████████| 50/50 [00:01<00:00, 37.69batch/s]
epoch: 186:   8%|▊         | 4/50 [00:00<00:01, 30.03batch/s]

loss training: 0.022877
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 186: 100%|██████████| 50/50 [00:01<00:00, 38.30batch/s]
epoch: 187:  10%|█         | 5/50 [00:00<00:01, 38.91batch/s]

loss training: 0.012604
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 187: 100%|██████████| 50/50 [00:01<00:00, 37.34batch/s]
epoch: 188:   8%|▊         | 4/50 [00:00<00:01, 36.95batch/s]

loss training: 0.007742
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 188: 100%|██████████| 50/50 [00:01<00:00, 38.26batch/s]
epoch: 189:   8%|▊         | 4/50 [00:00<00:01, 36.22batch/s]

loss training: 0.020192
accuracy train: 1.000000 test: 0.850000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 189: 100%|██████████| 50/50 [00:01<00:00, 30.19batch/s]
epoch: 190:  10%|█         | 5/50 [00:00<00:01, 40.21batch/s]

loss training: 0.050703
accuracy train: 1.000000 test: 0.750000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 190: 100%|██████████| 50/50 [00:01<00:00, 36.83batch/s]
epoch: 191:   8%|▊         | 4/50 [00:00<00:01, 39.82batch/s]

loss training: 0.011705
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 191: 100%|██████████| 50/50 [00:01<00:00, 38.66batch/s]
epoch: 192:  10%|█         | 5/50 [00:00<00:01, 40.30batch/s]

loss training: 0.021786
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 192: 100%|██████████| 50/50 [00:01<00:00, 38.31batch/s]
epoch: 193:   8%|▊         | 4/50 [00:00<00:01, 37.85batch/s]

loss training: 0.013971
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 193: 100%|██████████| 50/50 [00:01<00:00, 36.12batch/s]
epoch: 194:   8%|▊         | 4/50 [00:00<00:01, 39.22batch/s]

loss training: 0.008983
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 194: 100%|██████████| 50/50 [00:01<00:00, 36.66batch/s]
epoch: 195:   8%|▊         | 4/50 [00:00<00:01, 39.90batch/s]

loss training: 0.008459
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 195: 100%|██████████| 50/50 [00:01<00:00, 36.02batch/s]
epoch: 196:   8%|▊         | 4/50 [00:00<00:01, 36.93batch/s]

loss training: 0.016044
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 196: 100%|██████████| 50/50 [00:01<00:00, 38.34batch/s]
epoch: 197:  10%|█         | 5/50 [00:00<00:01, 40.74batch/s]

loss training: 0.012760
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 197: 100%|██████████| 50/50 [00:01<00:00, 37.70batch/s]
epoch: 198:   8%|▊         | 4/50 [00:00<00:01, 39.02batch/s]

loss training: 0.007777
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 198: 100%|██████████| 50/50 [00:01<00:00, 37.27batch/s]
epoch: 199:   8%|▊         | 4/50 [00:00<00:01, 38.43batch/s]

loss training: 0.009890
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 199: 100%|██████████| 50/50 [00:01<00:00, 38.46batch/s]
epoch: 200:   8%|▊         | 4/50 [00:00<00:01, 38.02batch/s]

loss training: 0.015970
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 200: 100%|██████████| 50/50 [00:01<00:00, 35.68batch/s]
epoch: 201:  10%|█         | 5/50 [00:00<00:01, 40.64batch/s]

loss training: 0.014226
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 201: 100%|██████████| 50/50 [00:01<00:00, 38.57batch/s]
epoch: 202:   8%|▊         | 4/50 [00:00<00:01, 38.90batch/s]

loss training: 0.011781
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 202: 100%|██████████| 50/50 [00:01<00:00, 38.15batch/s]
epoch: 203:  10%|█         | 5/50 [00:00<00:01, 40.22batch/s]

loss training: 0.013049
accuracy train: 1.000000 test: 0.800000

Parameter containing:
tensor([0., 0., 0., 0.], device='cuda:0', requires_grad=True)


epoch: 203:  74%|███████▍  | 37/50 [00:00<00:00, 39.01batch/s]

KeyboardInterrupt: 