In [31]:
import os
import dgl
import torch
import pickle
import datetime

import numpy as np
import pandas as pd

from itertools import chain
from dgl.data import DGLDataset

# Step 1: Create dataset

In [32]:
class CB12Dataset(DGLDataset):
    """
    CB12 resume dataset for node classification
    
    
    Dataset statistics:
    
    - Nodes: 
    - Node features: 
    - Edges: 
    - Edge Weights:
    - Number of Classes: 
    
    Attributes
    ----------
    num_classes : int
        Number of node classes
    data : list
        A list of :class:`dgl.DGLGraph` objects
    
    """
    
    def __init__(self):
        super(CB12Dataset, self).__init__(name='CB12')
    
    def process(self):
        
        # Node data
        
        ## title
        title_ids = []
        title_names = []
        f_title = open("../data/cb12/graph/id_title.txt", encoding="ISO-8859-1")
        while True:
            id_title = f_title.readline()
            if not id_title:
                break
                
            id_title = id_title.strip().split('\t')
            title_identity = int(id_title[0])
            title_ids.append(title_identity)
            title_name = id_title[1]
            title_names.append(title_name)
        
        f_title.close()
        print("Unique titles: {}".format(len(title_ids)))
        print('\n')
        
        
        ## tag
        tag_ids = []
        tag_names = []
        f_tag = open("../data/cb12/graph/id_tag.txt", encoding="ISO-8859-1")
        while True:
            id_tag = f_tag.readline()
            if not id_tag:
                break
            
            id_tag = id_tag.strip().split('\t')
            tag_identity = int(id_tag[0])
            tag_ids.append(tag_identity)
            tag_name = id_tag[1]
            tag_names.append(tag_name)
        f_tag.close()
        print("Unique tags: {}".format(len(tag_ids)))
        print('\n')
        
        title_ids_invmap = {x: i for i, x in enumerate(title_ids)}
        tag_ids_invmap = {x: i for i, x in enumerate(tag_ids)}
        
      
        
        # Title feature
        token_idx = []
        f_title_feature = open("../data/cb12/graph/title_feature.txt", "r")
        for title_feature in f_title_feature:
            title_feature = title_feature.split('\t')
            identity = int(title_feature[0])
            feature = title_feature[1]
            token_idx.append(feature)
        all_tokens = chain.from_iterable([eval(item) for item in token_idx])
        vocab_size = len(set(all_tokens))
        print('Vocab size: ', vocab_size)
        f_title_feature.close()
        
        node_features = [[0 for _ in range(vocab_size)] for _ in range(len(title_ids_invmap))]
        f_title_feature = open("../data/cb12/graph/title_feature.txt", "r")
        for title_feature in f_title_feature:
            title_feature = title_feature.split('\t')
            identity = int(title_feature[0])
            feature = title_feature[1]
            title_id = title_ids_invmap[identity]
            for i in eval(feature):
                node_features[title_id][i] = 1
        node_features = torch.from_numpy(np.array(node_features)).float()
        print(node_features.shape)
        f_title_feature.close()
        
        # Title label 
        all_labels = []
        f_title_label = open("../data/cb12/graph/title_label.txt", "r")
        for title_label in f_title_label:
            title_label = title_label.split('\t')
            identity = int(title_label[0])
            # MinorGroup, MajorGroup
            label = title_label[2]
            all_labels.append(label)
        self.all_labels = all_labels
        label_to_id = {label: idx for idx, label in enumerate(set(self.all_labels))}
        print('Number of labels: ', len(label_to_id))
        f_title_label.close()
        
        node_labels = np.zeros(len(title_ids_invmap), dtype=np.int64)
        f_title_label = open("../data/cb12/graph/title_label.txt", "r")
        for title_label in f_title_label:
            title_label = title_label.split('\t')
            identity = int(title_label[0])
            label = title_label[2]
            title_id = title_ids_invmap[identity]
            label_id = label_to_id[label]
            node_labels[title_id] = label_id
        node_labels = torch.LongTensor(node_labels)
        f_title_label.close()
    
        
        
        # Edge data
        
        ## title-title
        title_title_src = []
        title_title_dst = []
        df_title_title = pd.read_csv("../data/cb12/graph/title_title_transition_MinorGroup200_enhanced.csv", "\t")
        for idx, row in df_title_title.iterrows():
            title_title_src.append(title_ids_invmap[row['Src']])
            title_title_dst.append(title_ids_invmap[row['Dst']])

        print('Unique title src: {}'.format(len(set(title_title_src))))
        print('Unique title dst: {}'.format(len(set(title_title_dst))))
        print('\n')

        
        ## title-tag
        title_tag_src = []
        title_tag_dst = []
        f_title_tag = open("../data/cb12/graph/title_tag.txt", "r")
        for title_tag in f_title_tag:
            title_tag = title_tag.split('\t')
            title = int(title_tag[0])
            tag = int(title_tag[1].strip('\n'))
            title_tag_src.append(title_ids_invmap[title])
            title_tag_dst.append(tag_ids_invmap[tag])
        f_title_tag.close()
        print('Unique title: {}'.format(len(set(title_tag_src))))
        print('Unique tag: {}'.format(len(set(title_tag_dst))))
        print('\n')


       
        
        heterG = dgl.heterograph({
            ('title', 'include', 'tag') : (title_tag_src, title_tag_dst),
            ('tag', 'is_included', 'title') : (title_tag_dst, title_tag_src),
            ('title', 'tt', 'title') : (title_title_src, title_title_dst),
         })
        
        self.graph = heterG
        self.graph.nodes['title'].data['feature'] = node_features
        self.graph.nodes['title'].data['label'] = node_labels
        
        
        n_nodes = heterG.number_of_nodes('title')
        n_train = int(n_nodes * 0.6)
        n_val = int(n_nodes * 0.2)
        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
        val_mask = torch.zeros(n_nodes, dtype=torch.bool)
        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
        train_mask[:n_train] = True
        val_mask[n_train:n_train + n_val] = True
        test_mask[n_train + n_val:] = True
        self.graph.nodes['title'].data['train_mask'] = train_mask
        self.graph.nodes['title'].data['val_mask'] = val_mask
        self.graph.nodes['title'].data['test_mask'] = test_mask

    

    def __getitem__(self, idx):
        """
        Get graph object
        
        Parameters
        ----------
        idx : int
            Item index
        Returns
        -------
        :class:`dgl.DGLGraph`
            graph structure and labels.
            - ``ndata['label']``: ground truth labelsv
        """
        assert idx == 0, "This dataset has only one graph"
        return self.graph
    
    def __len__(self):
        r"""The number of graphs in the dataset."""
        return 1   
    
    

    @property
    def data(self):
        return self._data
   

    @property
    def num_classes(self):
        """Number of classes."""
        print("Number of classes: {}".format(len(set(self.all_labels))))
        return len(set(self.all_labels))
        
        

In [33]:
dataset_cb12 = CB12Dataset()
graph_cb12 = dataset_cb12[0]

Unique titles: 9216


Unique tags: 200


Vocab size:  1682
torch.Size([9216, 1682])
Number of labels:  16
Unique title src: 9216
Unique title dst: 9216


Unique title: 8799
Unique tag: 200




# Step 2: Training

In [34]:
import torch.nn as nn
import dgl.function as fn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

In [35]:
def compute_metrics(logits, labels):
    _, indices = torch.max(logits, dim=1)
    preds = indices.long().cpu().numpy()
    labels = labels.cpu().numpy()
    
    acc = accuracy_score(labels, preds)
    macro_f1 = f1_score(labels, preds, average='macro')
    micro_f1 = f1_score(labels, preds, average='micro')
    weighted_f1 = f1_score(labels, preds, average='weighted')
    return acc, macro_f1, micro_f1, weighted_f1


def evaluate(model, g, features, labels, mask, loss_fn):
    model.eval()
    with torch.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
    loss = loss_fn(logits, labels.long())
    acc, macro_f1, micro_f1, weighted_f1 = compute_metrics(logits, labels)
    return loss, acc, macro_f1, micro_f1, weighted_f1

In [51]:
class EarlyStopping:
    def __init__(self, patience=10):
        dt = datetime.datetime.now()
        self.filename = 'early_stop_{}_{:02d}-{:02d}-{:02d}.pth'.format(dt.date(), dt.hour, dt.minute, dt.second)
        
        self.patience = patience
        self.counter = 0
        self.best_loss = None
        self.best_score = None
        self.early_stop = False
        
    def save_checkpoint(self, model, model_name):
        """
        Save model when validation loss decrease
        """
        dirs = os.path.join('../checkpoints/' + model_name)
        if not os.path.exists(dirs):
            os.makedirs(dirs)
        torch.save(model.state_dict(), os.path.join(dirs + '/' + self.filename))
    
    
    def load_checkpoint(self, model, model_name):
        """
        Load the latest checkpoint
        """
        model.load_state_dict(torch.load(os.path.join('../checkpoints/' + model_name + '/' + self.filename)))
    
                   
    
    def step(self, model, model_name, loss, acc):
        score = acc
        if self.best_score is None:
            self.best_score = score
            self.best_loss = loss
            self.save_checkpoint(model, model_name)
            
        elif (loss > self.best_loss) and (acc < self.best_score):
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        
        else:
            if (loss <= self.best_loss) and (acc >= self.best_score):
                self.save_checkpoint(model, model_name)
                
            self.best_score = np.max((acc, self.best_score))
            self.best_loss = np.min((loss, self.best_loss))
            self.counter = 0
        
        return self.early_stop

    

def train(g, model, model_name, lr, weight_decay, epoch):
    stopper = EarlyStopping(patience=100)
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    best_val_acc = 0
    best_test_acc = 0
    
    best_val_macro_f1 = 0
    best_test_macro_f1 = 0

    features = g.nodes['title'].data['feature']
    labels = g.nodes['title'].data['label']
    
    train_mask = g.nodes['title'].data['train_mask']
    val_mask = g.nodes['title'].data['val_mask']
    test_mask = g.nodes['title'].data['test_mask']
   
    
    list_train_loss = []
    
    list_train_acc = []
    list_val_acc = []
    list_test_acc = []
    
    list_train_macro_f1 = []
    list_val_macro_f1  = []
    list_test_macro_f1  = []
    
    list_train_micro_f1 = []
    list_val_micro_f1  = []
    list_test_micro_f1  = []
    
    list_train_weighted_f1 = []
    list_val_weighted_f1  = []
    list_test_weighted_f1  = []
    
    
    for e in range(epoch):
        # Forward
        logits = model(g, features)

        # Compute prediction
        pred = logits.argmax(1)

        # Compute loss
        # Note that you should only compute the losses of the nodes in the training set.
        loss = loss_fcn(logits[train_mask], labels[train_mask].long())
        list_train_loss.append(loss.detach().numpy())
        
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
        # Compute accuracy on training/validation/test
        train_acc, train_macro_f1, train_micro_f1, train_weighted_f1 = compute_metrics(logits[train_mask], labels[train_mask])
        list_train_acc.append(train_acc)
        list_train_macro_f1.append(train_macro_f1)
        list_train_micro_f1.append(train_micro_f1)
        list_train_weighted_f1.append(train_weighted_f1)
        
        
        
        val_loss, val_acc, val_macro_f1, val_micro_f1, val_weighted_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn)
        list_val_acc.append(val_acc)
        list_val_macro_f1.append(val_macro_f1)
        list_val_micro_f1.append(val_micro_f1)
        list_val_weighted_f1.append(val_weighted_f1)
        
        if stopper.step(model, model_name, val_loss, val_acc):
            break
        

        # Save the best validation accuracy and the corresponding test accuracy.
        if best_val_acc < val_acc:
            best_val_acc = val_acc


        if e % 100 == 0:
            print('In epoch {}, loss: {:.4f}'.format(e, loss))
            print('train acc: {:.4f}, val acc: {:.4f} (best {:.4f})'.format(train_acc, val_acc, best_val_acc))
            print('train macro_f1: {:.4f}, val macro_f1: {:.4f}'.format(train_macro_f1, val_macro_f1))
            print('train micro_f1: {:.4f}, val micro_f1: {:.4f}'.format(train_micro_f1, val_micro_f1))
            print('train weighted_f1: {:.4f}, val weighted_f1: {:.4f}'.format(train_weighted_f1, val_weighted_f1))
            print("-----------------------------")
    
    stopper.load_checkpoint(model, model_name)
    test_loss, test_acc, test_macro_f1, test_micro_f1, test_weighted_f1 = evaluate(model, g, features, labels, test_mask)
    print('test acc: {:.4f}, test macro_f1: {:.4f}, test micro_f1: {:.4f}, test weighted_f1: {:.4f}'.format(test_acc, test_macro_f1, test_micro_f1, test_weighted_f1))
            
    
    
    results =  pd.DataFrame({'loss': list_train_loss, 
                'train_acc': list_train_acc, 
                'val_acc':list_val_acc,
                'train_macro_f1': list_train_macro_f1,
                'val_macro_f1': list_val_macro_f1,
                'train_micro_f1': list_train_micro_f1,
                'val_micro_f1': list_val_micro_f1,
                'train_weighted_f1': list_train_weighted_f1,
                'val_weighted_f1': list_val_weighted_f1,
               })
    
    dirs = os.path.join('results/' + model_name)
    if not os.path.exists(dirs):
        os.makedirs(dirs)
            
    f_out = open(os.path.join(dirs + '/' + 'lr' + str(lr) +'.pkl'), 'wb')
    pickle.dump(results, f_out)

## HAN

In [52]:
from dgl.nn import GATConv

In [53]:
class SemanticAttention(torch.nn.Module):
    def __init__(self, in_feats, n_hidden):
        """
        :param g[dgl]: graph
        :param in_feats[int]: dimension of input features
        :param n_hidden[int]: number of hidden units
        """
        super(SemanticAttention, self).__init__()
        self.project = torch.nn.Sequential(torch.nn.Linear(in_feats, n_hidden), torch.nn.Tanh(), torch.nn.Linear(n_hidden, 1, bias=False))
    
    def forward(self, z):
        w = self.project(z).mean(0) # (N, M, D*K) -> (N, M, 1) -> (M, 1)
        beta = torch.softmax(w, dim=0) # (M, 1)
        beta = beta.expand((z.shape[0],) + beta.shape) # (N, M, 1)
        return (beta * z).sum(1) # (N, M, 1)*(N, M, D*K) = (N, M, D*K) -> sum(1) (N, D*K)

In [54]:
class HANLayer(torch.nn.Module):
    def __init__(self, meta_paths, in_feats, n_hidden, n_heads, activation, feat_dropout=0.2, attn_dropout=0.2, negative_slope=0.2, residual=False):
        """
        :param meta_paths[list]: list of metapaths, each as a list of edge types
        :param in_feats[int]: dimension of input features
        :param n_hidden[int]: number of hidden units
        :param n_heads[int]: number of hidden attention heads, default=8
        :param activation[str]: callable activation function/layer or None, optional.
        :param feat_dropout[float]: dropout rate on feature, default=0
        :param attn_dropout[float]: dropout rate on attention weight, default=0
        :param negative_slope[float]: the negative slope of leaky relu, default=0.2 
        :param residual[bool]: use residual connection
        """
        super(HANLayer, self).__init__()
        # One GAT layer for each meta path based adjacency matrix
        self.gat_layers = torch.nn.ModuleList()
        for l in range(len(meta_paths)):
            self.gat_layers.append(GATConv(in_feats, n_hidden, n_heads, feat_dropout, attn_dropout, negative_slope, residual, activation=F.elu, allow_zero_in_degree=True))
        
        # Due to multi-head, in_feats=n_hidden*n_heads
        self.semantic_attention = SemanticAttention(n_hidden*n_heads, n_hidden)
        self.meta_paths = list(tuple(meta_path) for meta_path in meta_paths)
        
        self._cached_graph = None
        self._cached_coalesced_graph = {}
    
    def forward(self, g, features):
        semantic_embeddings = []
        if self._cached_graph is None or self._cached_graph is not g:
            self._cached_graph = g
            self._cached_coalesced_graph.clear()
            for meta_path in self.meta_paths:
                self._cached_coalesced_graph[meta_path] = dgl.metapath_reachable_graph(g, meta_path)
        
        for i, meta_path in enumerate(self.meta_paths):
            new_g = self._cached_coalesced_graph[meta_path] # get homN through metapath
            # N: number of nodes
            # d: dimension of hidden state
            # K: number of attention heads
            # M: number of metapaths
            semantic_embeddings.append(self.gat_layers[i](new_g, features).flatten(1)) #(N, d*K) 
        semantic_embeddings = torch.stack(semantic_embeddings, dim=1) # (N, M, d * K)
        return self.semantic_attention(semantic_embeddings) # (N, D * K)
  

In [55]:
class HAN(torch.nn.Module):
    def __init__(self, meta_paths, in_feats, n_hidden, n_classes, n_heads, n_out_heads, activation, feat_dropout=0.2, attn_dropout=0.2, negative_slope=0.2, residual=False):
        """
        :param meta_paths[list]: list of metapaths, each as a list of edge types
        :param in_feats[int]: dimension of input features
        :param n_hidden[int]: number of hidden units
        :param n_classes[int]: number of classes
        :param n_heads[int]: number of hidden attention heads, default=8
        :param n_out_heads[int]: number of output attention heads, default=1
        :param activation[str]: callable activation function/layer or None, optional.
        :param feat_dropout[float]: dropout rate on feature, default=0
        :param attn_dropout[float]: dropout rate on attention weight, default=0
        :param negative_slope[float]: the negative slope of leaky relu, default=0.2 
        :param residual[bool]: use residual connection
        """
        super(HAN, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.activation = activation
        # Input layer
        self.layers.append(HANLayer(meta_paths, in_feats, n_hidden, n_heads[0], activation, feat_dropout, attn_dropout))
        
        # Hidden layer
        for l in range(1, len(n_heads)):
            self.layers.append(HANLayer(meta_paths, n_hidden*n_heads[l-1], n_hidden, n_heads[l], activation, feat_dropout, attn_dropout))
        
        # Output layer
        self.predict = torch.nn.Linear(n_hidden*n_heads[-1], n_classes)
    
    def forward(self, g, featuers):
        h = featuers
        for layer in self.layers:
            h = layer(g, h)
        
        # Output layer
        logits = self.predict(h)
        return logits
    
    def embedding(self, g, x, nodes=None):
        """
        Returns the embeddings of the input nodes
        Parameters
        ----------
        nodes: Tensor, optional
            Input nodes, if set `None`, will return all the node embedding.
        Returns
        -------
        Tensor
            Node embedding.
        """
        h = x
        for layer in self.layers:
            h = layer(g, h)
        save(h, 'results/HAN/emb.pkl')
        return h
    

In [56]:
model_HAN = HAN(
    meta_paths=[
        ['include', 'is_included'],
        ['tt']
    ],
    in_feats=graph_cb12.nodes['title'].data['feature'].shape[1],
    n_hidden=16,
    n_classes=dataset_cb12.num_classes,
    n_heads=[8],
    n_out_heads=1,
    activation=F.elu,
    feat_dropout=0.2,
    attn_dropout=0.6,
    negative_slope=0.2,
    residual=False
)
train(graph_cb12, model_HAN, 'HAN', lr=0.001, weight_decay=0.0005, epoch=2000)

Number of classes: 16




In epoch 0, loss: 2.7744
train acc: 0.0289, val acc: 0.0407 (best 0.0407)
train macro_f1: 0.0168, val macro_f1: 0.0218
train micro_f1: 0.0289, val micro_f1: 0.0407
train weighted_f1: 0.0236, val weighted_f1: 0.0416
-----------------------------




In epoch 100, loss: 1.1851
train acc: 0.6958, val acc: 0.7162 (best 0.7162)
train macro_f1: 0.5368, val macro_f1: 0.4682
train micro_f1: 0.6958, val micro_f1: 0.7162
train weighted_f1: 0.6697, val weighted_f1: 0.6921
-----------------------------




In epoch 200, loss: 0.6477
train acc: 0.8124, val acc: 0.7537 (best 0.7537)
train macro_f1: 0.7768, val macro_f1: 0.6470
train micro_f1: 0.8124, val micro_f1: 0.7537
train weighted_f1: 0.8103, val weighted_f1: 0.7501
-----------------------------




In epoch 300, loss: 0.4810
train acc: 0.8647, val acc: 0.7564 (best 0.7645)
train macro_f1: 0.8452, val macro_f1: 0.6838
train micro_f1: 0.8647, val micro_f1: 0.7564
train weighted_f1: 0.8641, val weighted_f1: 0.7548
-----------------------------




EarlyStopping counter: 1 out of 100




EarlyStopping counter: 2 out of 100




EarlyStopping counter: 3 out of 100




EarlyStopping counter: 4 out of 100




EarlyStopping counter: 5 out of 100




EarlyStopping counter: 6 out of 100




EarlyStopping counter: 7 out of 100




EarlyStopping counter: 8 out of 100




EarlyStopping counter: 9 out of 100




EarlyStopping counter: 10 out of 100




EarlyStopping counter: 11 out of 100




EarlyStopping counter: 12 out of 100




EarlyStopping counter: 13 out of 100




EarlyStopping counter: 14 out of 100




EarlyStopping counter: 15 out of 100




EarlyStopping counter: 16 out of 100




EarlyStopping counter: 17 out of 100




EarlyStopping counter: 18 out of 100




EarlyStopping counter: 19 out of 100




EarlyStopping counter: 20 out of 100




EarlyStopping counter: 21 out of 100




EarlyStopping counter: 22 out of 100




EarlyStopping counter: 23 out of 100




EarlyStopping counter: 24 out of 100




EarlyStopping counter: 25 out of 100




EarlyStopping counter: 26 out of 100




EarlyStopping counter: 27 out of 100




EarlyStopping counter: 28 out of 100




EarlyStopping counter: 29 out of 100




EarlyStopping counter: 30 out of 100




EarlyStopping counter: 31 out of 100




EarlyStopping counter: 32 out of 100




EarlyStopping counter: 33 out of 100




EarlyStopping counter: 34 out of 100




EarlyStopping counter: 35 out of 100




EarlyStopping counter: 36 out of 100




EarlyStopping counter: 37 out of 100




EarlyStopping counter: 38 out of 100




EarlyStopping counter: 39 out of 100




EarlyStopping counter: 40 out of 100




EarlyStopping counter: 41 out of 100




EarlyStopping counter: 42 out of 100




EarlyStopping counter: 43 out of 100




EarlyStopping counter: 44 out of 100




EarlyStopping counter: 45 out of 100




EarlyStopping counter: 46 out of 100




EarlyStopping counter: 47 out of 100




EarlyStopping counter: 48 out of 100




EarlyStopping counter: 49 out of 100




EarlyStopping counter: 50 out of 100




EarlyStopping counter: 51 out of 100




EarlyStopping counter: 52 out of 100




EarlyStopping counter: 53 out of 100




EarlyStopping counter: 54 out of 100




EarlyStopping counter: 55 out of 100




EarlyStopping counter: 56 out of 100




EarlyStopping counter: 57 out of 100




EarlyStopping counter: 58 out of 100




EarlyStopping counter: 59 out of 100




EarlyStopping counter: 60 out of 100




EarlyStopping counter: 61 out of 100




EarlyStopping counter: 62 out of 100




EarlyStopping counter: 63 out of 100




EarlyStopping counter: 64 out of 100




EarlyStopping counter: 65 out of 100




EarlyStopping counter: 66 out of 100




EarlyStopping counter: 67 out of 100




EarlyStopping counter: 68 out of 100




EarlyStopping counter: 69 out of 100




EarlyStopping counter: 70 out of 100




EarlyStopping counter: 71 out of 100




EarlyStopping counter: 72 out of 100




EarlyStopping counter: 73 out of 100




EarlyStopping counter: 74 out of 100




EarlyStopping counter: 75 out of 100




EarlyStopping counter: 76 out of 100




EarlyStopping counter: 77 out of 100




EarlyStopping counter: 78 out of 100




EarlyStopping counter: 79 out of 100




EarlyStopping counter: 80 out of 100




EarlyStopping counter: 81 out of 100




EarlyStopping counter: 82 out of 100




EarlyStopping counter: 83 out of 100




EarlyStopping counter: 84 out of 100




EarlyStopping counter: 85 out of 100




EarlyStopping counter: 86 out of 100




EarlyStopping counter: 87 out of 100
In epoch 400, loss: 0.3813
train acc: 0.9034, val acc: 0.7629 (best 0.7645)
train macro_f1: 0.8901, val macro_f1: 0.7050
train micro_f1: 0.9034, val micro_f1: 0.7629
train weighted_f1: 0.9032, val weighted_f1: 0.7614
-----------------------------




EarlyStopping counter: 88 out of 100




EarlyStopping counter: 89 out of 100




EarlyStopping counter: 90 out of 100




EarlyStopping counter: 91 out of 100




EarlyStopping counter: 92 out of 100




EarlyStopping counter: 93 out of 100




EarlyStopping counter: 94 out of 100




EarlyStopping counter: 95 out of 100




EarlyStopping counter: 96 out of 100




EarlyStopping counter: 97 out of 100




EarlyStopping counter: 98 out of 100




EarlyStopping counter: 99 out of 100




EarlyStopping counter: 100 out of 100


TypeError: evaluate() missing 1 required positional argument: 'loss_fn'