In [1]:
!pip install transformers seqeval



In [2]:


import json
import pandas as pd
import numpy as np
import re
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, DistilBertForTokenClassification
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
from collections import Counter
warnings.filterwarnings('ignore')

In [3]:
# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

In [4]:
print("Loading data from resume_data.json...")

try:
    with open('/content/resume_data.json', 'r', encoding='utf-8') as f:
        file_content = f.read()

    lines = file_content.split('\n')

    data = []
    for line in lines:
        if line.strip():  # Skip empty lines
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Error parsing JSON: {str(e)} - {line[:50]}...")

    print(f"Successfully loaded {len(data)} examples")

except Exception as e:
    print(f"Error loading file: {str(e)}")
    data = []

Loading data from resume_data.json...
Successfully loaded 430 examples


In [5]:

# Data exploration
if data:
    example = data[0]
    print("\nExample content (truncated to 200 chars):")
    print(example['content'][:200])
    print("\nExample annotations:")
    for annotation in example['annotation']:
        print(f"Label: {annotation['label']}")
        for point in annotation['points']:
            print(f"  - Text: {point['text'][:50]}...")
else:
    print("No data available. Please check file path and format.")


Example content (truncated to 200 chars):
Abhishek Jha
Application Development Associate - Accenture

Bengaluru, Karnataka - Email me on Indeed: indeed.com/r/Abhishek-Jha/10e7a8cb732bc43a

• To work for an organization which provides me the o

Example annotations:
Label: ['SKILLS']
  - Text: 
• Programming language: C, C++, Java
• Oracle Peo...
Label: ['SKILLS']
  - Text: C (Less than 1 year), Database (Less than 1 year),...
Label: ['COLLEGE NAME']
  - Text: Kendriya Vidyalaya...
Label: ['COLLEGE NAME']
  - Text: Woodbine modern school...
Label: ['COLLEGE NAME']
  - Text: B.v.b college of engineering and technology...
Label: ['DESIGNATION']
  - Text: B.E in Information science and engineering
...
Label: ['COMPANY']
  - Text: Accenture...
Label: ['DESIGNATION']
  - Text: Application Development Associate...
Label: ['EMAIL']
  - Text: Indeed: indeed.com/r/Abhishek-Jha/10e7a8cb732bc43a...
Label: ['LOCATION']
  - Text: Bengaluru...
Label: ['COMPANY']
  - Text: Accenture...
Label: ['DESIGN

In [6]:

# Define unique labels
all_labels = set()
for example in data:
    for annotation in example['annotation']:
        all_labels.update(annotation['label'])

print("\nUnique labels found in the dataset:")
unique_labels = sorted(list(all_labels))
print(unique_labels)


Unique labels found in the dataset:
['COLLEGE NAME', 'COMPANY', 'DEGREE', 'DESIGNATION', 'EMAIL', 'LOCATION', 'NAME', 'SKILLS']


In [7]:

# Add 'O' for non-entity tokens and special tokens
labels = ['O'] + [f'B-{label}' for label in unique_labels] + [f'I-{label}' for label in unique_labels]
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for i, label in enumerate(labels)}

print(f"\nTotal number of labels: {len(labels)}")
print(f"Labels: {labels}")


Total number of labels: 17
Labels: ['O', 'B-COLLEGE NAME', 'B-COMPANY', 'B-DEGREE', 'B-DESIGNATION', 'B-EMAIL', 'B-LOCATION', 'B-NAME', 'B-SKILLS', 'I-COLLEGE NAME', 'I-COMPANY', 'I-DEGREE', 'I-DESIGNATION', 'I-EMAIL', 'I-LOCATION', 'I-NAME', 'I-SKILLS']


In [8]:


# Improved BIO format conversion
def convert_data_to_bio_format(data):
    """Convert all examples to BIO format with improved entity boundary detection"""
    processed_data = []

    for example in data:
        try:
            content = example['content']
            annotations = example['annotation']

            # Initialize all characters with 'O' tag
            char_labels = ['O'] * len(content)

            # Create a more structured representation of annotations to handle overlaps
            entity_spans = []
            for annotation in annotations:
                for label in annotation['label']:
                    for point in annotation['points']:
                        if 'start' in point and 'end' in point:
                            start, end = point['start'], point['end']
                            if start < len(content) and end <= len(content):
                                entity_spans.append((start, end, label))

            # Sort by start position and then by length (longer spans first for overlaps)
            entity_spans.sort(key=lambda x: (x[0], x[1]-x[0]), reverse=True)

            # Apply labels to character sequence
            for start, end, label in entity_spans:
                char_labels[start] = f'B-{label}'
                for i in range(start + 1, end):
                    char_labels[i] = f'I-{label}'

            # Tokenize content into words more carefully
            # This regex splits by whitespace but keeps punctuation separate
            tokens = []
            spans = []

            # Using regex to tokenize while preserving token positions
            for match in re.finditer(r'\S+', content):
                token = match.group()
                start, end = match.span()
                tokens.append(token)
                spans.append((start, end))

            # Assign labels to tokens based on character labels
            token_labels = []
            for token, (start, end) in zip(tokens, spans):
                # Get the labels for this span
                span_labels = char_labels[start:end]

                # Count the labels in this span
                label_counts = Counter(span_labels)

                # Prioritize B- labels, then I- labels, then O
                b_labels = [l for l in span_labels if l.startswith('B-')]
                i_labels = [l for l in span_labels if l.startswith('I-')]

                if b_labels:
                    # Use the most frequent B- label
                    most_common_b = Counter(b_labels).most_common(1)[0][0]
                    token_labels.append(most_common_b)
                elif i_labels:
                    # Use the most common I- label
                    most_common_i = Counter(i_labels).most_common(1)[0][0]
                    token_labels.append(most_common_i)
                else:
                    # No entity
                    token_labels.append('O')

            processed_data.append({
                'tokens': tokens,
                'labels': token_labels,
                'original_text': content
            })

        except Exception as e:
            print(f"Error processing example: {str(e)}")
            continue

    return processed_data

# Process all data
processed_data = convert_data_to_bio_format(data)
print(f"\nProcessed {len(processed_data)} examples to BIO format")


Processed 430 examples to BIO format


In [9]:

# Show an example of processed data
if processed_data:
    example = processed_data[0]
    print("\nExample processed data (first 10 tokens):")
    for token, label in zip(example['tokens'][:10], example['labels'][:10]):
        print(f"{token} -> {label}")

    # Analyze label distribution
    all_processed_labels = []
    for example in processed_data:
        all_processed_labels.extend(example['labels'])

    label_counts = Counter(all_processed_labels)
    print("\nLabel distribution in processed data:")
    for label, count in label_counts.most_common():
        print(f"{label}: {count}")
else:
    print("No processed data available.")


Example processed data (first 10 tokens):
Abhishek -> B-NAME
Jha -> I-NAME
Application -> B-DESIGNATION
Development -> I-DESIGNATION
Associate -> I-DESIGNATION
- -> O
Accenture -> B-COMPANY
Bengaluru, -> B-LOCATION
Karnataka -> O
- -> O

Label distribution in processed data:
O: 184035
I-SKILLS: 13883
I-COLLEGE NAME: 1698
I-DESIGNATION: 1636
I-DEGREE: 1584
B-COMPANY: 1186
B-DESIGNATION: 928
I-COMPANY: 875
B-SKILLS: 788
B-LOCATION: 783
B-COLLEGE NAME: 612
B-DEGREE: 571
B-EMAIL: 443
B-NAME: 438
I-NAME: 429
I-EMAIL: 186
I-LOCATION: 50


In [10]:

# Add entity presence information to each example
for example in processed_data:
    # Create a string representation of which entities are present in an example
    present_entities = set()
    for label in example['labels']:
        if label != 'O':
            entity_type = label.split('-')[1]
            present_entities.add(entity_type)
    example['entity_presence'] = '+'.join(sorted(present_entities)) if present_entities else 'none'

In [11]:

# Count entity presence distribution
entity_presence_counts = Counter(ex['entity_presence'] for ex in processed_data)
print("\nEntity distribution before split:")
for entity_type, count in entity_presence_counts.most_common():
    print(f"{entity_type}: {count}")

# Filter out classes with fewer than 2 samples for stratified splitting
valid_classes = {cls for cls, count in entity_presence_counts.items() if count >= 2}
filtered_data = [ex for ex in processed_data if ex['entity_presence'] in valid_classes]

if len(filtered_data) < len(processed_data):
    print(f"⚠ Warning: {len(processed_data) - len(filtered_data)} examples removed due to insufficient class instances for stratification.")



Entity distribution before split:
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 193
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME: 42
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 30
COLLEGE NAME+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 17
COLLEGE NAME+COMPANY+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 14
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 14
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+NAME+SKILLS: 13
COLLEGE NAME+COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 11
COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 10
COLLEGE NAME+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 7
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+NAME+SKILLS: 7
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME: 6
DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 6
COLLEGE NAME+DEGREE+DESIGNATION+LOCATION+NAME: 5
COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 4
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME: 4
COLLEGE NAME+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 4
COLLE

In [12]:

# FIRST SPLIT THE DATA
try:
    train_data, temp_data = train_test_split(
        filtered_data,
        test_size=0.3,
        random_state=42,
        shuffle=True,
        stratify=[ex['entity_presence'] for ex in filtered_data]
    )

    val_data, test_data = train_test_split(
        temp_data,
        test_size=0.5,
        random_state=42,
        shuffle=True,
        stratify=[ex['entity_presence'] for ex in temp_data]
    )
except ValueError as e:
    print(f"⚠ Stratified split failed: {e}")
    print("❗ Falling back to random splitting without stratification.")

    train_data, temp_data = train_test_split(
        filtered_data, test_size=0.3, random_state=42, shuffle=True
    )
    val_data, test_data = train_test_split(
        temp_data, test_size=0.5, random_state=42, shuffle=True
    )

print(f"\nInitial data split: Train {len(train_data)} (70%), Validation {len(val_data)} (15%), Test {len(test_data)} (15%) examples")

⚠ Stratified split failed: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
❗ Falling back to random splitting without stratification.

Initial data split: Train 298 (70%), Validation 64 (15%), Test 65 (15%) examples


In [13]:
# Data Augmentation Functions (same as before)
import random
def augment_token_replacement(tokens, labels, replacement_prob=0.15):
    """Replace random non-entity tokens with similar length tokens"""
    augmented_tokens = tokens.copy()

    # Create a pool of non-entity tokens from the TRAINING dataset only
    non_entity_tokens = {}
    for length in range(1, 15):  # Group tokens by length
        non_entity_tokens[length] = []

    # Group non-entity tokens by length from training data only
    for example in train_data:  # Use train_data instead of processed_data
        for token, label in zip(example['tokens'], example['labels']):
            if label == 'O':
                length = min(len(token), 14)  # Cap at 14 to avoid sparse groups
                non_entity_tokens[length].append(token)

    # Replace non-entity tokens with probability replacement_prob
    for i, (token, label) in enumerate(zip(tokens, labels)):
        if label == 'O' and random.random() < replacement_prob:
            token_length = min(len(token), 14)
            if non_entity_tokens[token_length]:  # If we have replacements of this length
                replacement = random.choice(non_entity_tokens[token_length])
                augmented_tokens[i] = replacement

    return augmented_tokens, labels

def augment_token_deletion(tokens, labels, deletion_prob=0.05):
    """Delete random non-entity tokens"""
    augmented_tokens = []
    augmented_labels = []

    for token, label in zip(tokens, labels):
        # Only delete O-labeled tokens with a certain probability
        if label == 'O' and random.random() < deletion_prob:
            continue
        augmented_tokens.append(token)
        augmented_labels.append(label)

    return augmented_tokens, augmented_labels

def augment_token_swap(tokens, labels, swap_prob=0.05):
    """Swap adjacent non-entity tokens"""
    augmented_tokens = tokens.copy()

    # Find eligible positions for swapping (adjacent O tokens)
    eligible_positions = []
    for i in range(len(labels) - 1):
        if labels[i] == 'O' and labels[i + 1] == 'O':
            eligible_positions.append(i)

    # Perform swaps
    num_swaps = int(len(eligible_positions) * swap_prob)
    if eligible_positions and num_swaps > 0:
        positions_to_swap = random.sample(eligible_positions, min(num_swaps, len(eligible_positions)))
        for pos in positions_to_swap:
            augmented_tokens[pos], augmented_tokens[pos + 1] = augmented_tokens[pos + 1], augmented_tokens[pos]

    return augmented_tokens, labels

def augment_data(data, augmentation_factor=2):
    """Apply data augmentation to increase dataset size"""
    augmented_data = []

    # First add all original examples
    augmented_data.extend(data)

    print(f"\nApplying data augmentation with factor {augmentation_factor}...")
    # Generate augmented examples
    for _ in range(augmentation_factor - 1):
        for example in data:
            tokens = example['tokens']
            labels = example['labels']

            # Choose random augmentation technique
            augmentation_choice = random.choice(['replace', 'delete', 'swap', 'combined'])

            if augmentation_choice == 'replace':
                aug_tokens, aug_labels = augment_token_replacement(tokens, labels)
            elif augmentation_choice == 'delete':
                aug_tokens, aug_labels = augment_token_deletion(tokens, labels)
            elif augmentation_choice == 'swap':
                aug_tokens, aug_labels = augment_token_swap(tokens, labels)
            else:  # combined
                # Apply multiple augmentations sequentially
                aug_tokens, aug_labels = tokens.copy(), labels.copy()

                if random.random() < 0.5:
                    aug_tokens, aug_labels = augment_token_replacement(aug_tokens, aug_labels, 0.1)
                if random.random() < 0.5:
                    aug_tokens, aug_labels = augment_token_deletion(aug_tokens, aug_labels, 0.03)
                if random.random() < 0.5:
                    aug_tokens, aug_labels = augment_token_swap(aug_tokens, aug_labels, 0.03)

            # Create augmented example
            augmented_data.append({
                'tokens': aug_tokens,
                'labels': aug_labels,
                'original_text': example['original_text'],
                'entity_presence': example['entity_presence'],
                'augmented': True
            })

    return augmented_data

# APPLY AUGMENTATION ONLY TO TRAINING DATA
AUGMENTATION_FACTOR = 3  # Original data + 2x augmented data
train_data_augmented = augment_data(train_data, AUGMENTATION_FACTOR)
print(f"Training data augmentation complete. Training set size increased from {len(train_data)} to {len(train_data_augmented)}")



Applying data augmentation with factor 3...
Training data augmentation complete. Training set size increased from 298 to 894


In [14]:

# Count entity presence distribution after augmentation for training data
train_entity_presence_counts = Counter(ex['entity_presence'] for ex in train_data_augmented)
print("\nTraining data entity distribution after augmentation:")
for entity_type, count in train_entity_presence_counts.most_common():
    print(f"{entity_type}: {count}")


Training data entity distribution after augmentation:
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 444
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME: 87
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 57
COLLEGE NAME+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 33
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 27
COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 24
COLLEGE NAME+COMPANY+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 24
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+NAME+SKILLS: 21
COLLEGE NAME+COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 15
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+NAME+SKILLS: 15
COLLEGE NAME+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 12
COLLEGE NAME+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 12
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME: 12
COLLEGE NAME+DEGREE+DESIGNATION+LOCATION+NAME: 9
DEGREE+EMAIL+LOCATION+NAME+SKILLS: 9
DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 9
COLLEGE NAME+DESIGNATION+EMAIL+NAME: 6
COLLEGE NAME+

In [15]:

# Check class distribution in splits
def print_entity_distribution(dataset, name):
    entity_presence = Counter([ex['entity_presence'] for ex in dataset])
    print(f"\n{name} set entity distribution:")
    for entity_type, count in entity_presence.most_common():
        print(f"{entity_type}: {count}")

print_entity_distribution(train_data_augmented, "Training (Augmented)")
print_entity_distribution(val_data, "Validation")
print_entity_distribution(test_data, "Test")


Training (Augmented) set entity distribution:
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 444
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME: 87
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 57
COLLEGE NAME+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 33
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 27
COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 24
COLLEGE NAME+COMPANY+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 24
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+NAME+SKILLS: 21
COLLEGE NAME+COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 15
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+NAME+SKILLS: 15
COLLEGE NAME+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 12
COLLEGE NAME+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 12
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME: 12
COLLEGE NAME+DEGREE+DESIGNATION+LOCATION+NAME: 9
DEGREE+EMAIL+LOCATION+NAME+SKILLS: 9
DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 9
COLLEGE NAME+DESIGNATION+EMAIL+NAME: 6
COLLEGE NAME+COMPANY+

In [16]:
# Fixed and improved ResumeNERDataset class for BERT
class ResumeNERDataset(Dataset):
    def __init__(self, texts, tags, tokenizer, max_len, label2id):
        self.texts = texts
        self.tags = tags
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.label2id = label2id

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item_idx):
        text = self.texts[item_idx]
        tags = self.tags[item_idx]

        # Ensure text and tags have the same length
        if len(text) != len(tags):
            min_len = min(len(text), len(tags))
            text = text[:min_len]
            tags = tags[:min_len]

        # Tokenize with BERT tokenizer
        encoding = self.tokenizer(
            text,
            is_split_into_words=True,
            return_offsets_mapping=True,
            padding='max_length',
            truncation=True,
            max_length=self.max_len,
            return_tensors='pt'
        )

        # Create label tensor with proper alignment to wordpieces
        word_ids = encoding.word_ids()
        previous_word_idx = None
        label_ids = []

        for word_idx in word_ids:
            if word_idx is None:
                # Special tokens like [CLS] and [SEP]
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                # First token of a word
                if word_idx < len(tags):
                    label_ids.append(self.label2id.get(tags[word_idx], self.label2id['O']))
                else:
                    label_ids.append(-100)
            else:
                # Continuation of a word (WordPiece)
                if tags[previous_word_idx].startswith('B-'):
                    # Convert B- to I- for continuation tokens
                    i_tag = f"I-{tags[previous_word_idx][2:]}"
                    label_ids.append(self.label2id.get(i_tag, self.label2id['O']))
                else:
                    # Keep the same tag for other cases
                    label_ids.append(self.label2id.get(tags[previous_word_idx], self.label2id['O']))

            previous_word_idx = word_idx

        # Create final item with all tensors
        item = {key: val.squeeze() for key, val in encoding.items() if key != 'offset_mapping'}
        item['labels'] = torch.tensor(label_ids)

        return item


In [17]:

from transformers import BertTokenizerFast, BertForTokenClassification
# Prepare datasets with adequate max length using BERT
MAX_LEN = 256  # Increased from 128 to capture more context
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# Use augmented training data, but original validation and test data
train_texts = [example['tokens'] for example in train_data_augmented]
train_tags = [example['labels'] for example in train_data_augmented]
val_texts = [example['tokens'] for example in val_data]
val_tags = [example['labels'] for example in val_data]
test_texts = [example['tokens'] for example in test_data]
test_tags = [example['labels'] for example in test_data]




train_dataset = ResumeNERDataset(train_texts, train_tags, tokenizer, MAX_LEN, label2id)
val_dataset = ResumeNERDataset(val_texts, val_tags, tokenizer, MAX_LEN, label2id)
test_dataset = ResumeNERDataset(test_texts, test_tags, tokenizer, MAX_LEN, label2id)

In [18]:

# Create data loaders with appropriate batch size for your data
BATCH_SIZE = 8  # Reduced to allow more gradient updates and better learning
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [19]:

# Initialize BERT model
model = BertForTokenClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")
model.to(device)


Using device: cuda


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [21]:
# Improved training parameters
LEARNING_RATE = 1e-5  # Slightly higher learning rate
EPOCHS = 100  # More epochs for better learning
WEIGHT_DECAY = 0.01  # Add weight decay to reduce overfitting

In [22]:
# Configure optimizer with weight decay
optimizer = AdamW(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

In [23]:
# Setup scheduler with warmup
total_steps = len(train_dataloader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * total_steps),  # 10% warmup
    num_training_steps=total_steps
)

In [24]:
# Enhanced training function with gradient accumulation
def train(accumulation_steps=4):
    model.train()
    total_loss = 0

    progress_bar = tqdm(train_dataloader, desc="Training")
    optimizer.zero_grad()

    for idx, batch in enumerate(progress_bar):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        loss = loss / accumulation_steps  # Normalize loss for accumulation
        loss.backward()

        total_loss += loss.item() * accumulation_steps  # Denormalize for reporting

        # Accumulated gradient update
        if (idx + 1) % accumulation_steps == 0 or (idx + 1) == len(train_dataloader):
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        progress_bar.set_postfix({'loss': f"{loss.item() * accumulation_steps:.4f}"})

    return total_loss / len(train_dataloader)

In [25]:
# Change 1: Add accuracy metric to the evaluate function
def evaluate(dataloader):
    model.eval()
    total_loss = 0
    predictions = []
    true_labels = []

    # Variables to track token-level accuracy
    correct_tokens = 0
    total_tokens = 0

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            loss = outputs.loss
            total_loss += loss.item()

            # Get predictions and true labels
            logits = outputs.logits
            preds = torch.argmax(logits, dim=2)

            # Remove ignored index (-100)
            for i in range(input_ids.shape[0]):
                pred_list = []
                true_list = []
                for j in range(input_ids.shape[1]):
                    if labels[i, j] != -100:
                        pred_list.append(id2label[preds[i, j].item()])
                        true_list.append(id2label[labels[i, j].item()])

                        # Count correct token predictions for accuracy
                        if preds[i, j].item() == labels[i, j].item():
                            correct_tokens += 1
                        total_tokens += 1

                predictions.append(pred_list)
                true_labels.append(true_list)

    # Calculate token-level accuracy
    token_accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0

    # Calculate metrics using seqeval
    report = classification_report(true_labels, predictions, output_dict=True)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)

    # Print token-level accuracy
    print(f"\nToken-level accuracy: {token_accuracy:.4f}")

    # Print entity-level metrics
    print("\nEntity-level metrics:")
    for entity_type, metrics in report.items():
        if entity_type not in ['micro avg', 'macro avg', 'weighted avg', 'O']:
            print(f"{entity_type}:")
            print(f"  Precision: {metrics['precision']:.4f}")
            print(f"  Recall: {metrics['recall']:.4f}")
            print(f"  F1: {metrics['f1-score']:.4f}")
            print(f"  Support: {metrics['support']}")

    return total_loss / len(dataloader), report, precision, recall, f1, token_accuracy

In [26]:
# Change 2: Update the training loop to track accuracy
train_losses = []
val_losses = []
val_accuracies = []  # New list to track validation accuracies
best_val_f1 = 0
best_model_path = "best_resume_ner_model.pt"
patience = 7  # Early stopping patience
no_improvement = 0

In [27]:
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")

    # Train
    train_loss = train()
    train_losses.append(train_loss)

    # Evaluate on validation set
    val_loss, val_report, val_precision, val_recall, val_f1, val_accuracy = evaluate(val_dataloader)
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)  # Track validation accuracy

    print(f"Train Loss: {train_loss:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1: {val_f1:.4f}")

    # Save best model and check for early stopping
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), best_model_path)
        print(f"New best model saved with F1: {val_f1:.4f}")
        no_improvement = 0
    else:
        no_improvement += 1
        print(f"No improvement for {no_improvement} epochs")

        if no_improvement >= patience:
            print("Early stopping triggered")
            break



Epoch 1/100


Training: 100%|██████████| 112/112 [00:44<00:00,  2.51it/s, loss=2.9868]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.81it/s]



Token-level accuracy: 0.0246

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0002
  Recall: 0.0345
  F1: 0.0005
  Support: 29
DESIGNATION:
  Precision: 0.0028
  Recall: 0.0500
  F1: 0.0052
  Support: 100
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 54
LOCATION:
  Precision: 0.0009
  Recall: 0.0330
  F1: 0.0018
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 2.9905
Val Loss: 2.9298, Accuracy: 0.0246, Precision: 0.0006, Recall: 0.0176, F1: 0.0012
New best model saved with F1: 0.0012

Epoch 2/100


Training: 100%|██████████| 112/112 [00:45<00:00,  2.48it/s, loss=2.7778]
Evaluating: 100%|██████████| 8/8 [00:05<00:00,  1.39it/s]



Token-level accuracy: 0.0751

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0003
  Recall: 0.0345
  F1: 0.0005
  Support: 29
DESIGNATION:
  Precision: 0.0036
  Recall: 0.0600
  F1: 0.0068
  Support: 100
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 2.8861
Val Loss: 2.7708, Accuracy: 0.0751, Precision: 0.0005, Recall: 0.0137, F1: 0.0010
No improvement for 1 epochs

Epoch 3/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=2.4809]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]



Token-level accuracy: 0.3974

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0004
  Recall: 0.0345
  F1: 0.0008
  Support: 29
DESIGNATION:
  Precision: 0.0026
  Recall: 0.0200
  F1: 0.0046
  Support: 100
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 2.6644
Val Loss: 2.4763, Accuracy: 0.3974, Precision: 0.0004, Recall: 0.0059, F1: 0.0008
No improvement for 2 epochs

Epoch 4/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=1.9484]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]



Token-level accuracy: 0.7732

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 100
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 2.2433
Val Loss: 1.8897, Accuracy: 0.7732, Precision: 0.0000, Recall: 0.0000, F1: 0.0000
No improvement for 3 epochs

Epoch 5/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=1.0895]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.73it/s]



Token-level accuracy: 0.7811

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 100
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 1.6133
Val Loss: 1.2231, Accuracy: 0.7811, Precision: 0.0000, Recall: 0.0000, F1: 0.0000
No improvement for 4 epochs

Epoch 6/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.66it/s, loss=1.2604]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.04it/s]



Token-level accuracy: 0.7816

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 100
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 1.1925
Val Loss: 0.9692, Accuracy: 0.7816, Precision: 0.0000, Recall: 0.0000, F1: 0.0000
No improvement for 5 epochs

Epoch 7/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.7654]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.04it/s]



Token-level accuracy: 0.8174

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 100
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.9965
Val Loss: 0.8152, Accuracy: 0.8174, Precision: 0.0000, Recall: 0.0000, F1: 0.0000
No improvement for 6 epochs

Epoch 8/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.6122]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.8301

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 100
EMAIL:
  Precision: 0.2568
  Recall: 0.7037
  F1: 0.3762
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.8301
Val Loss: 0.7098, Accuracy: 0.8301, Precision: 0.2533, Recall: 0.0745, F1: 0.1152
New best model saved with F1: 0.1152

Epoch 9/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.5260]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.8320

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 100
EMAIL:
  Precision: 0.3475
  Recall: 0.7593
  F1: 0.4767
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.7105
Val Loss: 0.6681, Accuracy: 0.8320, Precision: 0.1872, Recall: 0.0804, F1: 0.1125
No improvement for 1 epochs

Epoch 10/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.66it/s, loss=0.5201]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.8326

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 100
EMAIL:
  Precision: 0.3826
  Recall: 0.8148
  F1: 0.5207
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.0556
  Recall: 0.0625
  F1: 0.0588
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.6289
Val Loss: 0.6204, Accuracy: 0.8326, Precision: 0.1616, Recall: 0.0941, F1: 0.1190
New best model saved with F1: 0.1190

Epoch 11/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.61it/s, loss=0.3150]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.09it/s]



Token-level accuracy: 0.8471

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.0909
  Recall: 0.0095
  F1: 0.0172
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.1282
  Recall: 0.0500
  F1: 0.0719
  Support: 100
EMAIL:
  Precision: 0.4455
  Recall: 0.8333
  F1: 0.5806
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.3717
  Recall: 0.6562
  F1: 0.4746
  Support: 64
SKILLS:
  Precision: 0.0118
  Recall: 0.0270
  F1: 0.0164
  Support: 37
Train Loss: 0.5553
Val Loss: 0.5629, Accuracy: 0.8471, Precision: 0.2467, Recall: 0.1843, F1: 0.2110
New best model saved with F1: 0.2110

Epoch 12/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.5369]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.60it/s]



Token-level accuracy: 0.8582

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 30
COMPANY:
  Precision: 0.2083
  Recall: 0.0476
  F1: 0.0775
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.1932
  Recall: 0.1700
  F1: 0.1809
  Support: 100
EMAIL:
  Precision: 0.4608
  Recall: 0.8704
  F1: 0.6026
  Support: 54
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 91
NAME:
  Precision: 0.3934
  Recall: 0.7500
  F1: 0.5161
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.4998
Val Loss: 0.5161, Accuracy: 0.8582, Precision: 0.2522, Recall: 0.2294, F1: 0.2402
New best model saved with F1: 0.2402

Epoch 13/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.3742]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.99it/s]



Token-level accuracy: 0.8687

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0200
  Recall: 0.0333
  F1: 0.0250
  Support: 30
COMPANY:
  Precision: 0.2400
  Recall: 0.0571
  F1: 0.0923
  Support: 105
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 29
DESIGNATION:
  Precision: 0.1760
  Recall: 0.2200
  F1: 0.1956
  Support: 100
EMAIL:
  Precision: 0.5158
  Recall: 0.9074
  F1: 0.6577
  Support: 54
LOCATION:
  Precision: 0.0800
  Recall: 0.0659
  F1: 0.0723
  Support: 91
NAME:
  Precision: 0.4414
  Recall: 0.7656
  F1: 0.5600
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.4445
Val Loss: 0.4771, Accuracy: 0.8687, Precision: 0.2405, Recall: 0.2608, F1: 0.2502
New best model saved with F1: 0.2502

Epoch 14/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.61it/s, loss=0.3732]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.08it/s]



Token-level accuracy: 0.8741

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0189
  Recall: 0.0333
  F1: 0.0241
  Support: 30
COMPANY:
  Precision: 0.1935
  Recall: 0.0571
  F1: 0.0882
  Support: 105
DEGREE:
  Precision: 0.1200
  Recall: 0.1034
  F1: 0.1111
  Support: 29
DESIGNATION:
  Precision: 0.1894
  Recall: 0.2500
  F1: 0.2155
  Support: 100
EMAIL:
  Precision: 0.5333
  Recall: 0.8889
  F1: 0.6667
  Support: 54
LOCATION:
  Precision: 0.2065
  Recall: 0.2088
  F1: 0.2077
  Support: 91
NAME:
  Precision: 0.5104
  Recall: 0.7656
  F1: 0.6125
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.3978
Val Loss: 0.4506, Accuracy: 0.8741, Precision: 0.2711, Recall: 0.2961, F1: 0.2830
New best model saved with F1: 0.2830

Epoch 15/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.66it/s, loss=0.2335]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.64it/s]



Token-level accuracy: 0.8833

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0625
  Recall: 0.1000
  F1: 0.0769
  Support: 30
COMPANY:
  Precision: 0.2128
  Recall: 0.0952
  F1: 0.1316
  Support: 105
DEGREE:
  Precision: 0.4091
  Recall: 0.3103
  F1: 0.3529
  Support: 29
DESIGNATION:
  Precision: 0.2061
  Recall: 0.2700
  F1: 0.2338
  Support: 100
EMAIL:
  Precision: 0.4381
  Recall: 0.8519
  F1: 0.5786
  Support: 54
LOCATION:
  Precision: 0.3654
  Recall: 0.4176
  F1: 0.3897
  Support: 91
NAME:
  Precision: 0.6588
  Recall: 0.8750
  F1: 0.7517
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 37
Train Loss: 0.3541
Val Loss: 0.4082, Accuracy: 0.8833, Precision: 0.3270, Recall: 0.3706, F1: 0.3474
New best model saved with F1: 0.3474

Epoch 16/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.4959]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.01it/s]



Token-level accuracy: 0.8867

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.1591
  Recall: 0.2333
  F1: 0.1892
  Support: 30
COMPANY:
  Precision: 0.2188
  Recall: 0.1333
  F1: 0.1657
  Support: 105
DEGREE:
  Precision: 0.3600
  Recall: 0.3103
  F1: 0.3333
  Support: 29
DESIGNATION:
  Precision: 0.2308
  Recall: 0.3300
  F1: 0.2716
  Support: 100
EMAIL:
  Precision: 0.5402
  Recall: 0.8704
  F1: 0.6667
  Support: 54
LOCATION:
  Precision: 0.4348
  Recall: 0.5495
  F1: 0.4854
  Support: 91
NAME:
  Precision: 0.7125
  Recall: 0.8906
  F1: 0.7917
  Support: 64
SKILLS:
  Precision: 0.0500
  Recall: 0.0541
  F1: 0.0519
  Support: 37
Train Loss: 0.3152
Val Loss: 0.3829, Accuracy: 0.8867, Precision: 0.3662, Recall: 0.4294, F1: 0.3953
New best model saved with F1: 0.3953

Epoch 17/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.3129]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]



Token-level accuracy: 0.8936

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.1778
  Recall: 0.2667
  F1: 0.2133
  Support: 30
COMPANY:
  Precision: 0.2500
  Recall: 0.1810
  F1: 0.2099
  Support: 105
DEGREE:
  Precision: 0.4138
  Recall: 0.4138
  F1: 0.4138
  Support: 29
DESIGNATION:
  Precision: 0.2593
  Recall: 0.3500
  F1: 0.2979
  Support: 100
EMAIL:
  Precision: 0.4800
  Recall: 0.8889
  F1: 0.6234
  Support: 54
LOCATION:
  Precision: 0.5340
  Recall: 0.6044
  F1: 0.5670
  Support: 91
NAME:
  Precision: 0.7838
  Recall: 0.9062
  F1: 0.8406
  Support: 64
SKILLS:
  Precision: 0.0625
  Recall: 0.0541
  F1: 0.0580
  Support: 37
Train Loss: 0.2852
Val Loss: 0.3687, Accuracy: 0.8936, Precision: 0.3990, Recall: 0.4647, F1: 0.4293
New best model saved with F1: 0.4293

Epoch 18/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.2727]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.07it/s]



Token-level accuracy: 0.9009

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.2000
  Recall: 0.2667
  F1: 0.2286
  Support: 30
COMPANY:
  Precision: 0.2872
  Recall: 0.2571
  F1: 0.2714
  Support: 105
DEGREE:
  Precision: 0.3929
  Recall: 0.3793
  F1: 0.3860
  Support: 29
DESIGNATION:
  Precision: 0.3134
  Recall: 0.4200
  F1: 0.3590
  Support: 100
EMAIL:
  Precision: 0.5319
  Recall: 0.9259
  F1: 0.6757
  Support: 54
LOCATION:
  Precision: 0.5872
  Recall: 0.7033
  F1: 0.6400
  Support: 91
NAME:
  Precision: 0.8143
  Recall: 0.8906
  F1: 0.8507
  Support: 64
SKILLS:
  Precision: 0.0625
  Recall: 0.0541
  F1: 0.0580
  Support: 37
Train Loss: 0.2553
Val Loss: 0.3616, Accuracy: 0.9009, Precision: 0.4343, Recall: 0.5118, F1: 0.4698
New best model saved with F1: 0.4698

Epoch 19/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.1655]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.56it/s]



Token-level accuracy: 0.9052

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.2045
  Recall: 0.3000
  F1: 0.2432
  Support: 30
COMPANY:
  Precision: 0.3153
  Recall: 0.3333
  F1: 0.3241
  Support: 105
DEGREE:
  Precision: 0.4444
  Recall: 0.4138
  F1: 0.4286
  Support: 29
DESIGNATION:
  Precision: 0.3876
  Recall: 0.5000
  F1: 0.4367
  Support: 100
EMAIL:
  Precision: 0.6173
  Recall: 0.9259
  F1: 0.7407
  Support: 54
LOCATION:
  Precision: 0.6050
  Recall: 0.7912
  F1: 0.6857
  Support: 91
NAME:
  Precision: 0.8406
  Recall: 0.9062
  F1: 0.8722
  Support: 64
SKILLS:
  Precision: 0.1282
  Recall: 0.1351
  F1: 0.1316
  Support: 37
Train Loss: 0.2283
Val Loss: 0.3472, Accuracy: 0.9052, Precision: 0.4701, Recall: 0.5706, F1: 0.5155
New best model saved with F1: 0.5155

Epoch 20/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.2818]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.92it/s]



Token-level accuracy: 0.9108

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.3077
  Recall: 0.4000
  F1: 0.3478
  Support: 30
COMPANY:
  Precision: 0.3448
  Recall: 0.3810
  F1: 0.3620
  Support: 105
DEGREE:
  Precision: 0.5385
  Recall: 0.4828
  F1: 0.5091
  Support: 29
DESIGNATION:
  Precision: 0.4590
  Recall: 0.5600
  F1: 0.5045
  Support: 100
EMAIL:
  Precision: 0.6538
  Recall: 0.9444
  F1: 0.7727
  Support: 54
LOCATION:
  Precision: 0.6635
  Recall: 0.7582
  F1: 0.7077
  Support: 91
NAME:
  Precision: 0.8571
  Recall: 0.9375
  F1: 0.8955
  Support: 64
SKILLS:
  Precision: 0.1944
  Recall: 0.1892
  F1: 0.1918
  Support: 37
Train Loss: 0.2069
Val Loss: 0.3399, Accuracy: 0.9108, Precision: 0.5228, Recall: 0.6059, F1: 0.5613
New best model saved with F1: 0.5613

Epoch 21/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.1997]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.09it/s]



Token-level accuracy: 0.9192

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.3784
  Recall: 0.4667
  F1: 0.4179
  Support: 30
COMPANY:
  Precision: 0.3719
  Recall: 0.4286
  F1: 0.3982
  Support: 105
DEGREE:
  Precision: 0.6296
  Recall: 0.5862
  F1: 0.6071
  Support: 29
DESIGNATION:
  Precision: 0.4583
  Recall: 0.5500
  F1: 0.5000
  Support: 100
EMAIL:
  Precision: 0.6071
  Recall: 0.9444
  F1: 0.7391
  Support: 54
LOCATION:
  Precision: 0.6731
  Recall: 0.7692
  F1: 0.7179
  Support: 91
NAME:
  Precision: 0.8696
  Recall: 0.9375
  F1: 0.9023
  Support: 64
SKILLS:
  Precision: 0.2895
  Recall: 0.2973
  F1: 0.2933
  Support: 37
Train Loss: 0.1871
Val Loss: 0.3148, Accuracy: 0.9192, Precision: 0.5383, Recall: 0.6333, F1: 0.5820
New best model saved with F1: 0.5820

Epoch 22/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.1456]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.9232

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.3333
  Recall: 0.4000
  F1: 0.3636
  Support: 30
COMPANY:
  Precision: 0.3934
  Recall: 0.4571
  F1: 0.4229
  Support: 105
DEGREE:
  Precision: 0.6538
  Recall: 0.5862
  F1: 0.6182
  Support: 29
DESIGNATION:
  Precision: 0.5128
  Recall: 0.6000
  F1: 0.5530
  Support: 100
EMAIL:
  Precision: 0.7353
  Recall: 0.9259
  F1: 0.8197
  Support: 54
LOCATION:
  Precision: 0.6990
  Recall: 0.7912
  F1: 0.7423
  Support: 91
NAME:
  Precision: 0.8696
  Recall: 0.9375
  F1: 0.9023
  Support: 64
SKILLS:
  Precision: 0.2368
  Recall: 0.2432
  F1: 0.2400
  Support: 37
Train Loss: 0.1704
Val Loss: 0.3052, Accuracy: 0.9232, Precision: 0.5665, Recall: 0.6431, F1: 0.6024
New best model saved with F1: 0.6024

Epoch 23/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0887]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.62it/s]



Token-level accuracy: 0.9302

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.3846
  Recall: 0.5000
  F1: 0.4348
  Support: 30
COMPANY:
  Precision: 0.3788
  Recall: 0.4762
  F1: 0.4219
  Support: 105
DEGREE:
  Precision: 0.6667
  Recall: 0.6207
  F1: 0.6429
  Support: 29
DESIGNATION:
  Precision: 0.5333
  Recall: 0.6400
  F1: 0.5818
  Support: 100
EMAIL:
  Precision: 0.6800
  Recall: 0.9444
  F1: 0.7907
  Support: 54
LOCATION:
  Precision: 0.7170
  Recall: 0.8352
  F1: 0.7716
  Support: 91
NAME:
  Precision: 0.8841
  Recall: 0.9531
  F1: 0.9173
  Support: 64
SKILLS:
  Precision: 0.2727
  Recall: 0.2432
  F1: 0.2571
  Support: 37
Train Loss: 0.1592
Val Loss: 0.3026, Accuracy: 0.9302, Precision: 0.5724, Recall: 0.6745, F1: 0.6193
New best model saved with F1: 0.6193

Epoch 24/100


Training: 100%|██████████| 112/112 [00:41<00:00,  2.67it/s, loss=0.1217]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.09it/s]



Token-level accuracy: 0.9254

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.4359
  Recall: 0.5667
  F1: 0.4928
  Support: 30
COMPANY:
  Precision: 0.3817
  Recall: 0.4762
  F1: 0.4237
  Support: 105
DEGREE:
  Precision: 0.8148
  Recall: 0.7586
  F1: 0.7857
  Support: 29
DESIGNATION:
  Precision: 0.5041
  Recall: 0.6100
  F1: 0.5520
  Support: 100
EMAIL:
  Precision: 0.7183
  Recall: 0.9444
  F1: 0.8160
  Support: 54
LOCATION:
  Precision: 0.7212
  Recall: 0.8242
  F1: 0.7692
  Support: 91
NAME:
  Precision: 0.8841
  Recall: 0.9531
  F1: 0.9173
  Support: 64
SKILLS:
  Precision: 0.3571
  Recall: 0.2703
  F1: 0.3077
  Support: 37
Train Loss: 0.1399
Val Loss: 0.3211, Accuracy: 0.9254, Precision: 0.5881, Recall: 0.6804, F1: 0.6309
New best model saved with F1: 0.6309

Epoch 25/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.1269]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]



Token-level accuracy: 0.9245

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.4737
  Recall: 0.6000
  F1: 0.5294
  Support: 30
COMPANY:
  Precision: 0.4309
  Recall: 0.5048
  F1: 0.4649
  Support: 105
DEGREE:
  Precision: 0.8621
  Recall: 0.8621
  F1: 0.8621
  Support: 29
DESIGNATION:
  Precision: 0.5083
  Recall: 0.6100
  F1: 0.5545
  Support: 100
EMAIL:
  Precision: 0.6190
  Recall: 0.9630
  F1: 0.7536
  Support: 54
LOCATION:
  Precision: 0.6786
  Recall: 0.8352
  F1: 0.7488
  Support: 91
NAME:
  Precision: 0.8841
  Recall: 0.9531
  F1: 0.9173
  Support: 64
SKILLS:
  Precision: 0.3000
  Recall: 0.2432
  F1: 0.2687
  Support: 37
Train Loss: 0.1270
Val Loss: 0.3521, Accuracy: 0.9245, Precision: 0.5868, Recall: 0.6961, F1: 0.6368
New best model saved with F1: 0.6368

Epoch 26/100


Training: 100%|██████████| 112/112 [00:43<00:00,  2.60it/s, loss=0.1007]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9391

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.4737
  Recall: 0.6000
  F1: 0.5294
  Support: 30
COMPANY:
  Precision: 0.4500
  Recall: 0.5143
  F1: 0.4800
  Support: 105
DEGREE:
  Precision: 0.8889
  Recall: 0.8276
  F1: 0.8571
  Support: 29
DESIGNATION:
  Precision: 0.5294
  Recall: 0.6300
  F1: 0.5753
  Support: 100
EMAIL:
  Precision: 0.8000
  Recall: 0.9630
  F1: 0.8739
  Support: 54
LOCATION:
  Precision: 0.7917
  Recall: 0.8352
  F1: 0.8128
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.2903
  Recall: 0.2432
  F1: 0.2647
  Support: 37
Train Loss: 0.1138
Val Loss: 0.2918, Accuracy: 0.9391, Precision: 0.6359, Recall: 0.7020, F1: 0.6673
New best model saved with F1: 0.6673

Epoch 27/100


Training: 100%|██████████| 112/112 [00:41<00:00,  2.68it/s, loss=0.2097]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.74it/s]



Token-level accuracy: 0.9278

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.4865
  Recall: 0.6000
  F1: 0.5373
  Support: 30
COMPANY:
  Precision: 0.4597
  Recall: 0.5429
  F1: 0.4978
  Support: 105
DEGREE:
  Precision: 0.8929
  Recall: 0.8621
  F1: 0.8772
  Support: 29
DESIGNATION:
  Precision: 0.5403
  Recall: 0.6700
  F1: 0.5982
  Support: 100
EMAIL:
  Precision: 0.6582
  Recall: 0.9630
  F1: 0.7820
  Support: 54
LOCATION:
  Precision: 0.7624
  Recall: 0.8462
  F1: 0.8021
  Support: 91
NAME:
  Precision: 0.9118
  Recall: 0.9688
  F1: 0.9394
  Support: 64
SKILLS:
  Precision: 0.2432
  Recall: 0.2432
  F1: 0.2432
  Support: 37
Train Loss: 0.1009
Val Loss: 0.3435, Accuracy: 0.9278, Precision: 0.6137, Recall: 0.7196, F1: 0.6625
No improvement for 1 epochs

Epoch 28/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0835]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.88it/s]



Token-level accuracy: 0.9395

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5135
  Recall: 0.6333
  F1: 0.5672
  Support: 30
COMPANY:
  Precision: 0.4701
  Recall: 0.5238
  F1: 0.4955
  Support: 105
DEGREE:
  Precision: 0.8621
  Recall: 0.8621
  F1: 0.8621
  Support: 29
DESIGNATION:
  Precision: 0.5902
  Recall: 0.7200
  F1: 0.6486
  Support: 100
EMAIL:
  Precision: 0.7612
  Recall: 0.9444
  F1: 0.8430
  Support: 54
LOCATION:
  Precision: 0.7642
  Recall: 0.8901
  F1: 0.8223
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.3000
  Recall: 0.2432
  F1: 0.2687
  Support: 37
Train Loss: 0.0903
Val Loss: 0.3049, Accuracy: 0.9395, Precision: 0.6504, Recall: 0.7333, F1: 0.6894
New best model saved with F1: 0.6894

Epoch 29/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0648]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.86it/s]



Token-level accuracy: 0.9291

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.4872
  Recall: 0.6333
  F1: 0.5507
  Support: 30
COMPANY:
  Precision: 0.4783
  Recall: 0.5238
  F1: 0.5000
  Support: 105
DEGREE:
  Precision: 0.8125
  Recall: 0.8966
  F1: 0.8525
  Support: 29
DESIGNATION:
  Precision: 0.5772
  Recall: 0.7100
  F1: 0.6368
  Support: 100
EMAIL:
  Precision: 0.7222
  Recall: 0.9630
  F1: 0.8254
  Support: 54
LOCATION:
  Precision: 0.7570
  Recall: 0.8901
  F1: 0.8182
  Support: 91
NAME:
  Precision: 0.9118
  Recall: 0.9688
  F1: 0.9394
  Support: 64
SKILLS:
  Precision: 0.2812
  Recall: 0.2432
  F1: 0.2609
  Support: 37
Train Loss: 0.0837
Val Loss: 0.3592, Accuracy: 0.9291, Precision: 0.6378, Recall: 0.7353, F1: 0.6831
No improvement for 1 epochs

Epoch 30/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.67it/s, loss=0.0613]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.95it/s]



Token-level accuracy: 0.9415

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5833
  Recall: 0.7000
  F1: 0.6364
  Support: 30
COMPANY:
  Precision: 0.5221
  Recall: 0.5619
  F1: 0.5413
  Support: 105
DEGREE:
  Precision: 0.8667
  Recall: 0.8966
  F1: 0.8814
  Support: 29
DESIGNATION:
  Precision: 0.5806
  Recall: 0.7200
  F1: 0.6429
  Support: 100
EMAIL:
  Precision: 0.7536
  Recall: 0.9630
  F1: 0.8455
  Support: 54
LOCATION:
  Precision: 0.7822
  Recall: 0.8681
  F1: 0.8229
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.3636
  Recall: 0.3243
  F1: 0.3429
  Support: 37
Train Loss: 0.0743
Val Loss: 0.2909, Accuracy: 0.9415, Precision: 0.6684, Recall: 0.7510, F1: 0.7073
New best model saved with F1: 0.7073

Epoch 31/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.61it/s, loss=0.0745]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.96it/s]



Token-level accuracy: 0.9398

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5135
  Recall: 0.6333
  F1: 0.5672
  Support: 30
COMPANY:
  Precision: 0.5500
  Recall: 0.5238
  F1: 0.5366
  Support: 105
DEGREE:
  Precision: 0.8667
  Recall: 0.8966
  F1: 0.8814
  Support: 29
DESIGNATION:
  Precision: 0.5917
  Recall: 0.7100
  F1: 0.6455
  Support: 100
EMAIL:
  Precision: 0.6974
  Recall: 0.9815
  F1: 0.8154
  Support: 54
LOCATION:
  Precision: 0.7921
  Recall: 0.8791
  F1: 0.8333
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.2195
  Recall: 0.2432
  F1: 0.2308
  Support: 37
Train Loss: 0.0684
Val Loss: 0.3152, Accuracy: 0.9398, Precision: 0.6556, Recall: 0.7353, F1: 0.6932
No improvement for 1 epochs

Epoch 32/100


Training: 100%|██████████| 112/112 [00:41<00:00,  2.67it/s, loss=0.0348]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.93it/s]



Token-level accuracy: 0.9435

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5833
  Recall: 0.7000
  F1: 0.6364
  Support: 30
COMPANY:
  Precision: 0.5333
  Recall: 0.5333
  F1: 0.5333
  Support: 105
DEGREE:
  Precision: 0.7500
  Recall: 0.8276
  F1: 0.7869
  Support: 29
DESIGNATION:
  Precision: 0.6239
  Recall: 0.7300
  F1: 0.6728
  Support: 100
EMAIL:
  Precision: 0.7536
  Recall: 0.9630
  F1: 0.8455
  Support: 54
LOCATION:
  Precision: 0.8163
  Recall: 0.8791
  F1: 0.8466
  Support: 91
NAME:
  Precision: 0.9104
  Recall: 0.9531
  F1: 0.9313
  Support: 64
SKILLS:
  Precision: 0.2571
  Recall: 0.2432
  F1: 0.2500
  Support: 37
Train Loss: 0.0596
Val Loss: 0.2882, Accuracy: 0.9435, Precision: 0.6726, Recall: 0.7373, F1: 0.7035
No improvement for 2 epochs

Epoch 33/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0443]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.13it/s]



Token-level accuracy: 0.9411

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5833
  Recall: 0.7000
  F1: 0.6364
  Support: 30
COMPANY:
  Precision: 0.5842
  Recall: 0.5619
  F1: 0.5728
  Support: 105
DEGREE:
  Precision: 0.8438
  Recall: 0.9310
  F1: 0.8852
  Support: 29
DESIGNATION:
  Precision: 0.6496
  Recall: 0.7600
  F1: 0.7005
  Support: 100
EMAIL:
  Precision: 0.7910
  Recall: 0.9815
  F1: 0.8760
  Support: 54
LOCATION:
  Precision: 0.8723
  Recall: 0.9011
  F1: 0.8865
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.2143
  Recall: 0.2432
  F1: 0.2278
  Support: 37
Train Loss: 0.0550
Val Loss: 0.3116, Accuracy: 0.9411, Precision: 0.6996, Recall: 0.7627, F1: 0.7298
New best model saved with F1: 0.7298

Epoch 34/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0154]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.56it/s]



Token-level accuracy: 0.9420

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5385
  Recall: 0.7000
  F1: 0.6087
  Support: 30
COMPANY:
  Precision: 0.5727
  Recall: 0.6000
  F1: 0.5860
  Support: 105
DEGREE:
  Precision: 0.7879
  Recall: 0.8966
  F1: 0.8387
  Support: 29
DESIGNATION:
  Precision: 0.6387
  Recall: 0.7600
  F1: 0.6941
  Support: 100
EMAIL:
  Precision: 0.7361
  Recall: 0.9815
  F1: 0.8413
  Support: 54
LOCATION:
  Precision: 0.8218
  Recall: 0.9121
  F1: 0.8646
  Support: 91
NAME:
  Precision: 0.9104
  Recall: 0.9531
  F1: 0.9313
  Support: 64
SKILLS:
  Precision: 0.3429
  Recall: 0.3243
  F1: 0.3333
  Support: 37
Train Loss: 0.0477
Val Loss: 0.3023, Accuracy: 0.9420, Precision: 0.6858, Recall: 0.7745, F1: 0.7274
No improvement for 1 epochs

Epoch 35/100


Training: 100%|██████████| 112/112 [00:41<00:00,  2.67it/s, loss=0.0456]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.14it/s]



Token-level accuracy: 0.9435

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5556
  Recall: 0.6667
  F1: 0.6061
  Support: 30
COMPANY:
  Precision: 0.5882
  Recall: 0.5714
  F1: 0.5797
  Support: 105
DEGREE:
  Precision: 0.8710
  Recall: 0.9310
  F1: 0.9000
  Support: 29
DESIGNATION:
  Precision: 0.7170
  Recall: 0.7600
  F1: 0.7379
  Support: 100
EMAIL:
  Precision: 0.7794
  Recall: 0.9815
  F1: 0.8689
  Support: 54
LOCATION:
  Precision: 0.8247
  Recall: 0.8791
  F1: 0.8511
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.3333
  Recall: 0.3784
  F1: 0.3544
  Support: 37
Train Loss: 0.0462
Val Loss: 0.2939, Accuracy: 0.9435, Precision: 0.7140, Recall: 0.7686, F1: 0.7403
New best model saved with F1: 0.7403

Epoch 36/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0288]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.70it/s]



Token-level accuracy: 0.9478

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5526
  Recall: 0.7000
  F1: 0.6176
  Support: 30
COMPANY:
  Precision: 0.5794
  Recall: 0.5905
  F1: 0.5849
  Support: 105
DEGREE:
  Precision: 0.8710
  Recall: 0.9310
  F1: 0.9000
  Support: 29
DESIGNATION:
  Precision: 0.6991
  Recall: 0.7900
  F1: 0.7418
  Support: 100
EMAIL:
  Precision: 0.8030
  Recall: 0.9815
  F1: 0.8833
  Support: 54
LOCATION:
  Precision: 0.8485
  Recall: 0.9231
  F1: 0.8842
  Support: 91
NAME:
  Precision: 0.8971
  Recall: 0.9531
  F1: 0.9242
  Support: 64
SKILLS:
  Precision: 0.4054
  Recall: 0.4054
  F1: 0.4054
  Support: 37
Train Loss: 0.0423
Val Loss: 0.2987, Accuracy: 0.9478, Precision: 0.7191, Recall: 0.7882, F1: 0.7521
New best model saved with F1: 0.7521

Epoch 37/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0194]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.08it/s]



Token-level accuracy: 0.9439

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5946
  Recall: 0.7333
  F1: 0.6567
  Support: 30
COMPANY:
  Precision: 0.6813
  Recall: 0.5905
  F1: 0.6327
  Support: 105
DEGREE:
  Precision: 0.8750
  Recall: 0.9655
  F1: 0.9180
  Support: 29
DESIGNATION:
  Precision: 0.7500
  Recall: 0.7800
  F1: 0.7647
  Support: 100
EMAIL:
  Precision: 0.7794
  Recall: 0.9815
  F1: 0.8689
  Support: 54
LOCATION:
  Precision: 0.8526
  Recall: 0.8901
  F1: 0.8710
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.4375
  Recall: 0.3784
  F1: 0.4058
  Support: 37
Train Loss: 0.0396
Val Loss: 0.3296, Accuracy: 0.9439, Precision: 0.7605, Recall: 0.7843, F1: 0.7722
New best model saved with F1: 0.7722

Epoch 38/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0178]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9483

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5641
  Recall: 0.7333
  F1: 0.6377
  Support: 30
COMPANY:
  Precision: 0.6596
  Recall: 0.5905
  F1: 0.6231
  Support: 105
DEGREE:
  Precision: 0.8750
  Recall: 0.9655
  F1: 0.9180
  Support: 29
DESIGNATION:
  Precision: 0.7212
  Recall: 0.7500
  F1: 0.7353
  Support: 100
EMAIL:
  Precision: 0.8030
  Recall: 0.9815
  F1: 0.8833
  Support: 54
LOCATION:
  Precision: 0.8557
  Recall: 0.9121
  F1: 0.8830
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.3947
  Recall: 0.4054
  F1: 0.4000
  Support: 37
Train Loss: 0.0374
Val Loss: 0.2961, Accuracy: 0.9483, Precision: 0.7449, Recall: 0.7843, F1: 0.7641
No improvement for 1 epochs

Epoch 39/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0300]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.9566

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6389
  Recall: 0.7667
  F1: 0.6970
  Support: 30
COMPANY:
  Precision: 0.6458
  Recall: 0.5905
  F1: 0.6169
  Support: 105
DEGREE:
  Precision: 0.8750
  Recall: 0.9655
  F1: 0.9180
  Support: 29
DESIGNATION:
  Precision: 0.7075
  Recall: 0.7500
  F1: 0.7282
  Support: 100
EMAIL:
  Precision: 0.7794
  Recall: 0.9815
  F1: 0.8689
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.3824
  Recall: 0.3514
  F1: 0.3662
  Support: 37
Train Loss: 0.0337
Val Loss: 0.2568, Accuracy: 0.9566, Precision: 0.7477, Recall: 0.7843, F1: 0.7656
No improvement for 2 epochs

Epoch 40/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0138]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.68it/s]



Token-level accuracy: 0.9517

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5897
  Recall: 0.7667
  F1: 0.6667
  Support: 30
COMPANY:
  Precision: 0.6190
  Recall: 0.6190
  F1: 0.6190
  Support: 105
DEGREE:
  Precision: 0.8485
  Recall: 0.9655
  F1: 0.9032
  Support: 29
DESIGNATION:
  Precision: 0.7315
  Recall: 0.7900
  F1: 0.7596
  Support: 100
EMAIL:
  Precision: 0.8281
  Recall: 0.9815
  F1: 0.8983
  Support: 54
LOCATION:
  Precision: 0.8571
  Recall: 0.9231
  F1: 0.8889
  Support: 91
NAME:
  Precision: 0.9118
  Recall: 0.9688
  F1: 0.9394
  Support: 64
SKILLS:
  Precision: 0.4146
  Recall: 0.4595
  F1: 0.4359
  Support: 37
Train Loss: 0.0302
Val Loss: 0.3022, Accuracy: 0.9517, Precision: 0.7392, Recall: 0.8059, F1: 0.7711
No improvement for 3 epochs

Epoch 41/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0579]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9463

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.6923
  Recall: 0.6000
  F1: 0.6429
  Support: 105
DEGREE:
  Precision: 0.8750
  Recall: 0.9655
  F1: 0.9180
  Support: 29
DESIGNATION:
  Precision: 0.7500
  Recall: 0.8100
  F1: 0.7788
  Support: 100
EMAIL:
  Precision: 0.7465
  Recall: 0.9815
  F1: 0.8480
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.4839
  Recall: 0.4054
  F1: 0.4412
  Support: 37
Train Loss: 0.0284
Val Loss: 0.3251, Accuracy: 0.9463, Precision: 0.7659, Recall: 0.8020, F1: 0.7835
New best model saved with F1: 0.7835

Epoch 42/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0157]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.88it/s]



Token-level accuracy: 0.9554

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5897
  Recall: 0.7667
  F1: 0.6667
  Support: 30
COMPANY:
  Precision: 0.6632
  Recall: 0.6000
  F1: 0.6300
  Support: 105
DEGREE:
  Precision: 0.8750
  Recall: 0.9655
  F1: 0.9180
  Support: 29
DESIGNATION:
  Precision: 0.8039
  Recall: 0.8200
  F1: 0.8119
  Support: 100
EMAIL:
  Precision: 0.8571
  Recall: 1.0000
  F1: 0.9231
  Support: 54
LOCATION:
  Precision: 0.8571
  Recall: 0.9231
  F1: 0.8889
  Support: 91
NAME:
  Precision: 0.9394
  Recall: 0.9688
  F1: 0.9538
  Support: 64
SKILLS:
  Precision: 0.3889
  Recall: 0.3784
  F1: 0.3836
  Support: 37
Train Loss: 0.0247
Val Loss: 0.2877, Accuracy: 0.9554, Precision: 0.7721, Recall: 0.8039, F1: 0.7877
New best model saved with F1: 0.7877

Epoch 43/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0262]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.29it/s]



Token-level accuracy: 0.9490

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7000
  Recall: 0.6000
  F1: 0.6462
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.8039
  Recall: 0.8200
  F1: 0.8119
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5000
  Recall: 0.5135
  F1: 0.5067
  Support: 37
Train Loss: 0.0233
Val Loss: 0.3078, Accuracy: 0.9490, Precision: 0.7913, Recall: 0.8176, F1: 0.8042
New best model saved with F1: 0.8042

Epoch 44/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.61it/s, loss=0.0170]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9478

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5897
  Recall: 0.7667
  F1: 0.6667
  Support: 30
COMPANY:
  Precision: 0.7045
  Recall: 0.5905
  F1: 0.6425
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.7941
  Recall: 0.8100
  F1: 0.8020
  Support: 100
EMAIL:
  Precision: 0.7826
  Recall: 1.0000
  F1: 0.8780
  Support: 54
LOCATION:
  Precision: 0.8660
  Recall: 0.9231
  F1: 0.8936
  Support: 91
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.4872
  Recall: 0.5135
  F1: 0.5000
  Support: 37
Train Loss: 0.0234
Val Loss: 0.3136, Accuracy: 0.9478, Precision: 0.7797, Recall: 0.8118, F1: 0.7954
No improvement for 1 epochs

Epoch 45/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.66it/s, loss=0.0212]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.62it/s]



Token-level accuracy: 0.9503

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5750
  Recall: 0.7667
  F1: 0.6571
  Support: 30
COMPANY:
  Precision: 0.7356
  Recall: 0.6095
  F1: 0.6667
  Support: 105
DEGREE:
  Precision: 0.8750
  Recall: 0.9655
  F1: 0.9180
  Support: 29
DESIGNATION:
  Precision: 0.7670
  Recall: 0.7900
  F1: 0.7783
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8557
  Recall: 0.9121
  F1: 0.8830
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.5116
  Recall: 0.5946
  F1: 0.5500
  Support: 37
Train Loss: 0.0193
Val Loss: 0.3145, Accuracy: 0.9503, Precision: 0.7824, Recall: 0.8176, F1: 0.7996
No improvement for 2 epochs

Epoch 46/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0247]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9502

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5750
  Recall: 0.7667
  F1: 0.6571
  Support: 30
COMPANY:
  Precision: 0.7065
  Recall: 0.6190
  F1: 0.6599
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.8100
  Recall: 0.8100
  F1: 0.8100
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8660
  Recall: 0.9231
  F1: 0.8936
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5116
  Recall: 0.5946
  F1: 0.5500
  Support: 37
Train Loss: 0.0200
Val Loss: 0.2990, Accuracy: 0.9502, Precision: 0.7869, Recall: 0.8255, F1: 0.8057
New best model saved with F1: 0.8057

Epoch 47/100


Training: 100%|██████████| 112/112 [00:43<00:00,  2.60it/s, loss=0.0143]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9488

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5897
  Recall: 0.7667
  F1: 0.6667
  Support: 30
COMPANY:
  Precision: 0.7283
  Recall: 0.6381
  F1: 0.6802
  Support: 105
DEGREE:
  Precision: 0.8485
  Recall: 0.9655
  F1: 0.9032
  Support: 29
DESIGNATION:
  Precision: 0.7788
  Recall: 0.8100
  F1: 0.7941
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.4091
  Recall: 0.4865
  F1: 0.4444
  Support: 37
Train Loss: 0.0224
Val Loss: 0.3274, Accuracy: 0.9488, Precision: 0.7755, Recall: 0.8196, F1: 0.7969
No improvement for 1 epochs

Epoch 48/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.66it/s, loss=0.0205]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.05it/s]



Token-level accuracy: 0.9484

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5750
  Recall: 0.7667
  F1: 0.6571
  Support: 30
COMPANY:
  Precision: 0.7033
  Recall: 0.6095
  F1: 0.6531
  Support: 105
DEGREE:
  Precision: 0.8485
  Recall: 0.9655
  F1: 0.9032
  Support: 29
DESIGNATION:
  Precision: 0.7757
  Recall: 0.8300
  F1: 0.8019
  Support: 100
EMAIL:
  Precision: 0.8060
  Recall: 1.0000
  F1: 0.8926
  Support: 54
LOCATION:
  Precision: 0.8571
  Recall: 0.9231
  F1: 0.8889
  Support: 91
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.5476
  Recall: 0.6216
  F1: 0.5823
  Support: 37
Train Loss: 0.0163
Val Loss: 0.3400, Accuracy: 0.9484, Precision: 0.7757, Recall: 0.8275, F1: 0.8008
No improvement for 2 epochs

Epoch 49/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0164]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.67it/s]



Token-level accuracy: 0.9527

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7021
  Recall: 0.6286
  F1: 0.6633
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.7297
  Recall: 0.8100
  F1: 0.7678
  Support: 100
EMAIL:
  Precision: 0.7397
  Recall: 1.0000
  F1: 0.8504
  Support: 54
LOCATION:
  Precision: 0.8571
  Recall: 0.9231
  F1: 0.8889
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5854
  Recall: 0.6486
  F1: 0.6154
  Support: 37
Train Loss: 0.0150
Val Loss: 0.3063, Accuracy: 0.9527, Precision: 0.7667, Recall: 0.8314, F1: 0.7977
No improvement for 3 epochs

Epoch 50/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0123]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9492

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.6989
  Recall: 0.6190
  F1: 0.6566
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.7864
  Recall: 0.8100
  F1: 0.7980
  Support: 100
EMAIL:
  Precision: 0.8182
  Recall: 1.0000
  F1: 0.9000
  Support: 54
LOCATION:
  Precision: 0.8400
  Recall: 0.9231
  F1: 0.8796
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.5349
  Recall: 0.6216
  F1: 0.5750
  Support: 37
Train Loss: 0.0173
Val Loss: 0.3321, Accuracy: 0.9492, Precision: 0.7815, Recall: 0.8275, F1: 0.8038
No improvement for 4 epochs

Epoch 51/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0060]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]



Token-level accuracy: 0.9496

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5897
  Recall: 0.7667
  F1: 0.6667
  Support: 30
COMPANY:
  Precision: 0.7416
  Recall: 0.6286
  F1: 0.6804
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.8283
  Recall: 0.8200
  F1: 0.8241
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8660
  Recall: 0.9231
  F1: 0.8936
  Support: 91
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.5750
  Recall: 0.6216
  F1: 0.5974
  Support: 37
Train Loss: 0.0147
Val Loss: 0.3212, Accuracy: 0.9496, Precision: 0.8042, Recall: 0.8294, F1: 0.8166
New best model saved with F1: 0.8166

Epoch 52/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0213]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.06it/s]



Token-level accuracy: 0.9494

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7416
  Recall: 0.6286
  F1: 0.6804
  Support: 105
DEGREE:
  Precision: 0.8750
  Recall: 0.9655
  F1: 0.9180
  Support: 29
DESIGNATION:
  Precision: 0.8020
  Recall: 0.8100
  F1: 0.8060
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.6098
  Recall: 0.6757
  F1: 0.6410
  Support: 37
Train Loss: 0.0137
Val Loss: 0.3139, Accuracy: 0.9494, Precision: 0.8030, Recall: 0.8314, F1: 0.8170
New best model saved with F1: 0.8170

Epoch 53/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0057]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.09it/s]



Token-level accuracy: 0.9551

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5750
  Recall: 0.7667
  F1: 0.6571
  Support: 30
COMPANY:
  Precision: 0.7419
  Recall: 0.6571
  F1: 0.6970
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.7523
  Recall: 0.8200
  F1: 0.7847
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5682
  Recall: 0.6757
  F1: 0.6173
  Support: 37
Train Loss: 0.0127
Val Loss: 0.3152, Accuracy: 0.9551, Precision: 0.7872, Recall: 0.8412, F1: 0.8133
No improvement for 1 epochs

Epoch 54/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0099]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.9496

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5750
  Recall: 0.7667
  F1: 0.6571
  Support: 30
COMPANY:
  Precision: 0.7473
  Recall: 0.6476
  F1: 0.6939
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.8119
  Recall: 0.8200
  F1: 0.8159
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.5854
  Recall: 0.6486
  F1: 0.6154
  Support: 37
Train Loss: 0.0119
Val Loss: 0.3275, Accuracy: 0.9496, Precision: 0.8041, Recall: 0.8373, F1: 0.8204
New best model saved with F1: 0.8204

Epoch 55/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0047]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.9548

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5897
  Recall: 0.7667
  F1: 0.6667
  Support: 30
COMPANY:
  Precision: 0.7753
  Recall: 0.6571
  F1: 0.7113
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.8317
  Recall: 0.8400
  F1: 0.8358
  Support: 100
EMAIL:
  Precision: 0.8571
  Recall: 1.0000
  F1: 0.9231
  Support: 54
LOCATION:
  Precision: 0.8660
  Recall: 0.9231
  F1: 0.8936
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.5581
  Recall: 0.6486
  F1: 0.6000
  Support: 37
Train Loss: 0.0121
Val Loss: 0.3306, Accuracy: 0.9548, Precision: 0.8129, Recall: 0.8431, F1: 0.8277
New best model saved with F1: 0.8277

Epoch 56/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0108]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.9497

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7753
  Recall: 0.6571
  F1: 0.7113
  Support: 105
DEGREE:
  Precision: 0.9333
  Recall: 0.9655
  F1: 0.9492
  Support: 29
DESIGNATION:
  Precision: 0.8137
  Recall: 0.8300
  F1: 0.8218
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8155
  Recall: 0.9231
  F1: 0.8660
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.6316
  Recall: 0.6486
  F1: 0.6400
  Support: 37
Train Loss: 0.0113
Val Loss: 0.3367, Accuracy: 0.9497, Precision: 0.8079, Recall: 0.8412, F1: 0.8242
No improvement for 1 epochs

Epoch 57/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0038]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.9480

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7976
  Recall: 0.6381
  F1: 0.7090
  Support: 105
DEGREE:
  Precision: 0.9333
  Recall: 0.9655
  F1: 0.9492
  Support: 29
DESIGNATION:
  Precision: 0.8218
  Recall: 0.8300
  F1: 0.8259
  Support: 100
EMAIL:
  Precision: 0.7397
  Recall: 1.0000
  F1: 0.8504
  Support: 54
LOCATION:
  Precision: 0.8155
  Recall: 0.9231
  F1: 0.8660
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.5581
  Recall: 0.6486
  F1: 0.6000
  Support: 37
Train Loss: 0.0104
Val Loss: 0.3446, Accuracy: 0.9480, Precision: 0.7937, Recall: 0.8373, F1: 0.8149
No improvement for 2 epochs

Epoch 58/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0045]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.12it/s]



Token-level accuracy: 0.9477

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6389
  Recall: 0.7667
  F1: 0.6970
  Support: 30
COMPANY:
  Precision: 0.7473
  Recall: 0.6476
  F1: 0.6939
  Support: 105
DEGREE:
  Precision: 0.9655
  Recall: 0.9655
  F1: 0.9655
  Support: 29
DESIGNATION:
  Precision: 0.8252
  Recall: 0.8500
  F1: 0.8374
  Support: 100
EMAIL:
  Precision: 0.7941
  Recall: 1.0000
  F1: 0.8852
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.4694
  Recall: 0.6216
  F1: 0.5349
  Support: 37
Train Loss: 0.0099
Val Loss: 0.3410, Accuracy: 0.9477, Precision: 0.7974, Recall: 0.8412, F1: 0.8187
No improvement for 3 epochs

Epoch 59/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.65it/s, loss=0.0021]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.35it/s]



Token-level accuracy: 0.9554

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7791
  Recall: 0.6381
  F1: 0.7016
  Support: 105
DEGREE:
  Precision: 0.9333
  Recall: 0.9655
  F1: 0.9492
  Support: 29
DESIGNATION:
  Precision: 0.8333
  Recall: 0.8500
  F1: 0.8416
  Support: 100
EMAIL:
  Precision: 0.8571
  Recall: 1.0000
  F1: 0.9231
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.5581
  Recall: 0.6486
  F1: 0.6000
  Support: 37
Train Loss: 0.0106
Val Loss: 0.3282, Accuracy: 0.9554, Precision: 0.8187, Recall: 0.8412, F1: 0.8298
New best model saved with F1: 0.8298

Epoch 60/100


Training: 100%|██████████| 112/112 [00:43<00:00,  2.60it/s, loss=0.0131]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.13it/s]



Token-level accuracy: 0.9470

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.8000
  Recall: 0.6476
  F1: 0.7158
  Support: 105
DEGREE:
  Precision: 0.9333
  Recall: 0.9655
  F1: 0.9492
  Support: 29
DESIGNATION:
  Precision: 0.8218
  Recall: 0.8300
  F1: 0.8259
  Support: 100
EMAIL:
  Precision: 0.8182
  Recall: 1.0000
  F1: 0.9000
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.6000
  Recall: 0.6486
  F1: 0.6234
  Support: 37
Train Loss: 0.0105
Val Loss: 0.3627, Accuracy: 0.9470, Precision: 0.8199, Recall: 0.8392, F1: 0.8295
No improvement for 1 epochs

Epoch 61/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.66it/s, loss=0.0045]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.58it/s]



Token-level accuracy: 0.9566

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7841
  Recall: 0.6571
  F1: 0.7150
  Support: 105
DEGREE:
  Precision: 0.9333
  Recall: 0.9655
  F1: 0.9492
  Support: 29
DESIGNATION:
  Precision: 0.8333
  Recall: 0.8500
  F1: 0.8416
  Support: 100
EMAIL:
  Precision: 0.8571
  Recall: 1.0000
  F1: 0.9231
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.6000
  Recall: 0.6486
  F1: 0.6234
  Support: 37
Train Loss: 0.0089
Val Loss: 0.3279, Accuracy: 0.9566, Precision: 0.8225, Recall: 0.8451, F1: 0.8337
New best model saved with F1: 0.8337

Epoch 62/100


Training: 100%|██████████| 112/112 [00:43<00:00,  2.60it/s, loss=0.0075]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]



Token-level accuracy: 0.9472

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7500
  Recall: 0.6571
  F1: 0.7005
  Support: 105
DEGREE:
  Precision: 0.9032
  Recall: 0.9655
  F1: 0.9333
  Support: 29
DESIGNATION:
  Precision: 0.8300
  Recall: 0.8300
  F1: 0.8300
  Support: 100
EMAIL:
  Precision: 0.7500
  Recall: 1.0000
  F1: 0.8571
  Support: 54
LOCATION:
  Precision: 0.8571
  Recall: 0.9231
  F1: 0.8889
  Support: 91
NAME:
  Precision: 0.9697
  Recall: 1.0000
  F1: 0.9846
  Support: 64
SKILLS:
  Precision: 0.5455
  Recall: 0.6486
  F1: 0.5926
  Support: 37
Train Loss: 0.0087
Val Loss: 0.3558, Accuracy: 0.9472, Precision: 0.7930, Recall: 0.8412, F1: 0.8164
No improvement for 1 epochs

Epoch 63/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.66it/s, loss=0.0022]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.96it/s]



Token-level accuracy: 0.9538

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6053
  Recall: 0.7667
  F1: 0.6765
  Support: 30
COMPANY:
  Precision: 0.7841
  Recall: 0.6571
  F1: 0.7150
  Support: 105
DEGREE:
  Precision: 0.9655
  Recall: 0.9655
  F1: 0.9655
  Support: 29
DESIGNATION:
  Precision: 0.8300
  Recall: 0.8300
  F1: 0.8300
  Support: 100
EMAIL:
  Precision: 0.7397
  Recall: 1.0000
  F1: 0.8504
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5333
  Recall: 0.6486
  F1: 0.5854
  Support: 37
Train Loss: 0.0083
Val Loss: 0.3144, Accuracy: 0.9538, Precision: 0.8004, Recall: 0.8412, F1: 0.8203
No improvement for 2 epochs

Epoch 64/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.62it/s, loss=0.0027]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  2.69it/s]



Token-level accuracy: 0.9492

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6000
  Recall: 0.8000
  F1: 0.6857
  Support: 30
COMPANY:
  Precision: 0.7582
  Recall: 0.6571
  F1: 0.7041
  Support: 105
DEGREE:
  Precision: 0.9062
  Recall: 1.0000
  F1: 0.9508
  Support: 29
DESIGNATION:
  Precision: 0.8000
  Recall: 0.8400
  F1: 0.8195
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8660
  Recall: 0.9231
  F1: 0.8936
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5714
  Recall: 0.6486
  F1: 0.6076
  Support: 37
Train Loss: 0.0084
Val Loss: 0.3313, Accuracy: 0.9492, Precision: 0.8015, Recall: 0.8471, F1: 0.8236
No improvement for 3 epochs

Epoch 65/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.0067]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.10it/s]



Token-level accuracy: 0.9485

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6316
  Recall: 0.8000
  F1: 0.7059
  Support: 30
COMPANY:
  Precision: 0.7640
  Recall: 0.6476
  F1: 0.7010
  Support: 105
DEGREE:
  Precision: 0.9667
  Recall: 1.0000
  F1: 0.9831
  Support: 29
DESIGNATION:
  Precision: 0.8333
  Recall: 0.8500
  F1: 0.8416
  Support: 100
EMAIL:
  Precision: 0.7606
  Recall: 1.0000
  F1: 0.8640
  Support: 54
LOCATION:
  Precision: 0.8750
  Recall: 0.9231
  F1: 0.8984
  Support: 91
NAME:
  Precision: 0.9254
  Recall: 0.9688
  F1: 0.9466
  Support: 64
SKILLS:
  Precision: 0.5854
  Recall: 0.6486
  F1: 0.6154
  Support: 37
Train Loss: 0.0078
Val Loss: 0.3420, Accuracy: 0.9485, Precision: 0.8052, Recall: 0.8431, F1: 0.8238
No improvement for 4 epochs

Epoch 66/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0050]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.11it/s]



Token-level accuracy: 0.9518

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6486
  Recall: 0.8000
  F1: 0.7164
  Support: 30
COMPANY:
  Precision: 0.7738
  Recall: 0.6190
  F1: 0.6878
  Support: 105
DEGREE:
  Precision: 1.0000
  Recall: 1.0000
  F1: 1.0000
  Support: 29
DESIGNATION:
  Precision: 0.8333
  Recall: 0.8500
  F1: 0.8416
  Support: 100
EMAIL:
  Precision: 0.8060
  Recall: 1.0000
  F1: 0.8926
  Support: 54
LOCATION:
  Precision: 0.8936
  Recall: 0.9231
  F1: 0.9081
  Support: 91
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.5581
  Recall: 0.6486
  F1: 0.6000
  Support: 37
Train Loss: 0.0077
Val Loss: 0.3211, Accuracy: 0.9518, Precision: 0.8199, Recall: 0.8392, F1: 0.8295
No improvement for 5 epochs

Epoch 67/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.63it/s, loss=0.0110]
Evaluating: 100%|██████████| 8/8 [00:02<00:00,  3.06it/s]



Token-level accuracy: 0.9458

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6316
  Recall: 0.8000
  F1: 0.7059
  Support: 30
COMPANY:
  Precision: 0.7556
  Recall: 0.6476
  F1: 0.6974
  Support: 105
DEGREE:
  Precision: 0.9355
  Recall: 1.0000
  F1: 0.9667
  Support: 29
DESIGNATION:
  Precision: 0.7798
  Recall: 0.8500
  F1: 0.8134
  Support: 100
EMAIL:
  Precision: 0.7500
  Recall: 1.0000
  F1: 0.8571
  Support: 54
LOCATION:
  Precision: 0.8660
  Recall: 0.9231
  F1: 0.8936
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5854
  Recall: 0.6486
  F1: 0.6154
  Support: 37
Train Loss: 0.0074
Val Loss: 0.3604, Accuracy: 0.9458, Precision: 0.7927, Recall: 0.8471, F1: 0.8190
No improvement for 6 epochs

Epoch 68/100


Training: 100%|██████████| 112/112 [00:42<00:00,  2.64it/s, loss=0.0049]
Evaluating: 100%|██████████| 8/8 [00:03<00:00,  2.65it/s]



Token-level accuracy: 0.9507

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6316
  Recall: 0.8000
  F1: 0.7059
  Support: 30
COMPANY:
  Precision: 0.7907
  Recall: 0.6476
  F1: 0.7120
  Support: 105
DEGREE:
  Precision: 0.9667
  Recall: 1.0000
  F1: 0.9831
  Support: 29
DESIGNATION:
  Precision: 0.8365
  Recall: 0.8700
  F1: 0.8529
  Support: 100
EMAIL:
  Precision: 0.8308
  Recall: 1.0000
  F1: 0.9076
  Support: 54
LOCATION:
  Precision: 0.8571
  Recall: 0.9231
  F1: 0.8889
  Support: 91
NAME:
  Precision: 0.9552
  Recall: 1.0000
  F1: 0.9771
  Support: 64
SKILLS:
  Precision: 0.5455
  Recall: 0.6486
  F1: 0.5926
  Support: 37
Train Loss: 0.0075
Val Loss: 0.3429, Accuracy: 0.9507, Precision: 0.8158, Recall: 0.8510, F1: 0.8330
No improvement for 7 epochs
Early stopping triggered


In [28]:
# Change 3: Load best model and evaluate with accuracy on test set
print("\nLoading best model for testing...")
model.load_state_dict(torch.load(best_model_path))
test_loss, test_report, test_precision, test_recall, test_f1, test_accuracy = evaluate(test_dataloader)

print("\nFinal Test Results:")
print(f"Loss: {test_loss:.4f}")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall: {test_recall:.4f}")
print(f"F1 Score: {test_f1:.4f}")



Loading best model for testing...


Evaluating: 100%|██████████| 9/9 [00:02<00:00,  3.47it/s]



Token-level accuracy: 0.9615

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5714
  Recall: 0.7407
  F1: 0.6452
  Support: 27
COMPANY:
  Precision: 0.6833
  Recall: 0.7321
  F1: 0.7069
  Support: 112
DEGREE:
  Precision: 0.7083
  Recall: 0.8500
  F1: 0.7727
  Support: 20
DESIGNATION:
  Precision: 0.7920
  Recall: 0.8462
  F1: 0.8182
  Support: 117
EMAIL:
  Precision: 0.8730
  Recall: 0.9821
  F1: 0.9244
  Support: 56
LOCATION:
  Precision: 0.8261
  Recall: 0.9383
  F1: 0.8786
  Support: 81
NAME:
  Precision: 0.9420
  Recall: 0.9848
  F1: 0.9630
  Support: 66
SKILLS:
  Precision: 0.4783
  Recall: 0.6471
  F1: 0.5500
  Support: 34

Final Test Results:
Loss: 0.2369
Accuracy: 0.9615
Precision: 0.7596
Recall: 0.8499
F1 Score: 0.8022


In [29]:
# Change 4: Plot accuracy along with loss
plt.figure(figsize=(12, 10))

# Plot loss
plt.subplot(2, 1, 1)
plt.plot(range(1, len(train_losses) + 1), train_losses, 'b-', label='Training Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, 'r-', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plot accuracy
plt.subplot(2, 1, 2)
plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, 'g-', label='Validation Accuracy')
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('training_metrics.png')
plt.close()

In [30]:
# Improved prediction function with better entity extraction
def predict_entities(text):
    """Predict NER tags for a new text with improved entity extraction"""
    model.eval()

    # Tokenize the text
    tokens = []
    for match in re.finditer(r'\S+', text):
        tokens.append(match.group())

    # Prepare input for the model
    inputs = tokenizer(
        tokens,
        is_split_into_words=True,
        return_offsets_mapping=True,
        padding='max_length',
        truncation=True,
        max_length=MAX_LEN,
        return_tensors='pt'
    )

    # Move inputs to device
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    # Get model predictions
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=2)

    # Convert predictions to labels
    predicted_labels = []
    word_ids = inputs.word_ids(0)  # Batch index 0
    previous_word_idx = None

    for idx, word_idx in enumerate(word_ids):
        if word_idx is None or word_idx == previous_word_idx:
            continue

        if idx < len(predictions[0]):
            predicted_labels.append(id2label[predictions[0, idx].item()])
        else:
            predicted_labels.append('O')

        previous_word_idx = word_idx

    # Truncate predictions if needed
    predicted_labels = predicted_labels[:len(tokens)]

    # Combine tokens and predictions
    result = list(zip(tokens, predicted_labels))

    return result

In [31]:
# Improved entity grouping for better visualization
def group_entities(predictions):
    """Group tokens with the same entity type together with improved boundary detection"""
    entities = {}
    current_entity = None
    current_text = []

    for token, label in predictions:
        if label == 'O':
            if current_entity:
                if current_entity not in entities:
                    entities[current_entity] = []
                entities[current_entity].append(' '.join(current_text))
                current_entity = None
                current_text = []
        elif label.startswith('B-'):
            if current_entity:
                if current_entity not in entities:
                    entities[current_entity] = []
                entities[current_entity].append(' '.join(current_text))
            current_entity = label[2:]  # Remove B- prefix
            current_text = [token]
        elif label.startswith('I-'):
            if current_entity == label[2:]:  # Only append if it's the same entity type
                current_text.append(token)
            else:
                # Start a new entity if the I- tag doesn't match current entity
                if current_entity:
                    if current_entity not in entities:
                        entities[current_entity] = []
                    entities[current_entity].append(' '.join(current_text))
                current_entity = label[2:]
                current_text = [token]

    # Add the last entity if there is one
    if current_entity and current_text:
        if current_entity not in entities:
            entities[current_entity] = []
        entities[current_entity].append(' '.join(current_text))

    return entities

In [36]:
# Example prediction with more comprehensive resume text
example_text = """John Smith
Email: john.smith@example.com
Phone: +1-234-567-8901
LinkedIn: linkedin.com/in/johnsmith

A passionate software engineer with 3 years of experience in full-stack development. Proficient in JavaScript, React, Node.js, and Python.

Education
B.Tech in Computer Science, Indian Institute of Technology Madras, 2018 - 2022
CGPA: 8.7/10

Experience
Software Engineer at Google, Bangalore — Jul 2022 to Present
- Built scalable REST APIs using Node.js and Express
- Led a team of 3 in migrating the dashboard to React

Software Engineering Intern at Microsoft, Hyderabad — May 2021 to Jul 2021
- Developed internal tools using Python and Flask
- Improved application load time by 30%

Projects
Resume Parser using BERT — Built a tool to extract structured information from resumes
E-commerce Web App — MERN stack application with authentication and payment gateway integration

Skills
JavaScript, React, Node.js, Express, Python, Flask, MongoDB, PostgreSQL, Git, Docker
"""

In [37]:
print("\nRunning prediction on comprehensive example:")
print(f"Text: {example_text[:100]}...")
predictions = predict_entities(example_text)


Running prediction on comprehensive example:
Text: John Smith  
Email: john.smith@example.com  
Phone: +1-234-567-8901  
LinkedIn: linkedin.com/in/john...


In [38]:
print("\nPredicted Entities (first 20):")
for token, label in predictions[:20]:
    print(f"{token} -> {label}")


Predicted Entities (first 20):
John -> B-NAME
Smith -> I-NAME
Email: -> O
john.smith@example.com -> B-NAME
Phone: -> I-EMAIL
+1-234-567-8901 -> I-EMAIL
LinkedIn: -> B-EMAIL
linkedin.com/in/johnsmith -> I-EMAIL
A -> O
passionate -> O
software -> O
engineer -> O
with -> O
3 -> O
years -> O
of -> O
experience -> O
in -> O
full-stack -> O
development. -> O


In [39]:
print("\nGrouped Entities:")
grouped = group_entities(predictions)
for entity_type, mentions in grouped.items():
    print(f"{entity_type}: {mentions}")


Grouped Entities:
NAME: ['John Smith', 'john.smith@example.com']
EMAIL: ['Phone: +1-234-567-8901', 'LinkedIn: linkedin.com/in/johnsmith']
SKILLS: ['in', 'JavaScript,', 'React,', 'Node.js, and', 'Python.', 'Node.js', 'Python', 'Flask', 'JavaScript, React,', 'Node.js, Express, Python, Flask, MongoDB, PostgreSQL, Git, Docker']
DEGREE: ['B.Tech in Computer Science,']
COLLEGE NAME: ['Indian Institute of Technology Madras,']
DESIGNATION: ['Software Engineer', 'Software Engineering Intern']
COMPANY: ['Google,', 'Microsoft,']
