In [None]:
!pip install transformers seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=14fabbed2d8270ede9051d564cde5796cce45fe82376e5756c30ecadcbca0c11
  Stored in directory: /root/.cache/pip/wheels/bc/92/f0/243288f899c2eacdfa8c5f9aede4c71a9bad0ee26a01dc5ead
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [None]:


import json
import pandas as pd
import numpy as np
import re
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
from collections import Counter
warnings.filterwarnings('ignore')

In [None]:
# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

In [None]:
print("Loading data from resume_data.json...")

try:
    with open('/content/resume_data.json', 'r', encoding='utf-8') as f:
        file_content = f.read()

    lines = file_content.split('\n')

    data = []
    for line in lines:
        if line.strip():  # Skip empty lines
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Error parsing JSON: {str(e)} - {line[:50]}...")

    print(f"Successfully loaded {len(data)} examples")

except Exception as e:
    print(f"Error loading file: {str(e)}")
    data = []

Loading data from resume_data.json...
Successfully loaded 430 examples


In [None]:

# Data exploration
if data:
    example = data[0]
    print("\nExample content (truncated to 200 chars):")
    print(example['content'][:200])
    print("\nExample annotations:")
    for annotation in example['annotation']:
        print(f"Label: {annotation['label']}")
        for point in annotation['points']:
            print(f"  - Text: {point['text'][:50]}...")
else:
    print("No data available. Please check file path and format.")


Example content (truncated to 200 chars):
Abhishek Jha
Application Development Associate - Accenture

Bengaluru, Karnataka - Email me on Indeed: indeed.com/r/Abhishek-Jha/10e7a8cb732bc43a

• To work for an organization which provides me the o

Example annotations:
Label: ['SKILLS']
  - Text: 
• Programming language: C, C++, Java
• Oracle Peo...
Label: ['SKILLS']
  - Text: C (Less than 1 year), Database (Less than 1 year),...
Label: ['COLLEGE NAME']
  - Text: Kendriya Vidyalaya...
Label: ['COLLEGE NAME']
  - Text: Woodbine modern school...
Label: ['COLLEGE NAME']
  - Text: B.v.b college of engineering and technology...
Label: ['DESIGNATION']
  - Text: B.E in Information science and engineering
...
Label: ['COMPANY']
  - Text: Accenture...
Label: ['DESIGNATION']
  - Text: Application Development Associate...
Label: ['EMAIL']
  - Text: Indeed: indeed.com/r/Abhishek-Jha/10e7a8cb732bc43a...
Label: ['LOCATION']
  - Text: Bengaluru...
Label: ['COMPANY']
  - Text: Accenture...
Label: ['DESIGN

In [None]:

# Define unique labels
all_labels = set()
for example in data:
    for annotation in example['annotation']:
        all_labels.update(annotation['label'])

print("\nUnique labels found in the dataset:")
unique_labels = sorted(list(all_labels))
print(unique_labels)


Unique labels found in the dataset:
['COLLEGE NAME', 'COMPANY', 'DEGREE', 'DESIGNATION', 'EMAIL', 'LOCATION', 'NAME', 'SKILLS']


In [None]:

# Add 'O' for non-entity tokens and special tokens
labels = ['O'] + [f'B-{label}' for label in unique_labels] + [f'I-{label}' for label in unique_labels]
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for i, label in enumerate(labels)}

print(f"\nTotal number of labels: {len(labels)}")
print(f"Labels: {labels}")


Total number of labels: 17
Labels: ['O', 'B-COLLEGE NAME', 'B-COMPANY', 'B-DEGREE', 'B-DESIGNATION', 'B-EMAIL', 'B-LOCATION', 'B-NAME', 'B-SKILLS', 'I-COLLEGE NAME', 'I-COMPANY', 'I-DEGREE', 'I-DESIGNATION', 'I-EMAIL', 'I-LOCATION', 'I-NAME', 'I-SKILLS']


In [None]:


# Improved BIO format conversion
def convert_data_to_bio_format(data):
    """Convert all examples to BIO format with improved entity boundary detection"""
    processed_data = []

    for example in data:
        try:
            content = example['content']
            annotations = example['annotation']

            # Initialize all characters with 'O' tag
            char_labels = ['O'] * len(content)

            # Create a more structured representation of annotations to handle overlaps
            entity_spans = []
            for annotation in annotations:
                for label in annotation['label']:
                    for point in annotation['points']:
                        if 'start' in point and 'end' in point:
                            start, end = point['start'], point['end']
                            if start < len(content) and end <= len(content):
                                entity_spans.append((start, end, label))

            # Sort by start position and then by length (longer spans first for overlaps)
            entity_spans.sort(key=lambda x: (x[0], x[1]-x[0]), reverse=True)

            # Apply labels to character sequence
            for start, end, label in entity_spans:
                char_labels[start] = f'B-{label}'
                for i in range(start + 1, end):
                    char_labels[i] = f'I-{label}'

            # Tokenize content into words more carefully
            # This regex splits by whitespace but keeps punctuation separate
            tokens = []
            spans = []

            # Using regex to tokenize while preserving token positions
            for match in re.finditer(r'\S+', content):
                token = match.group()
                start, end = match.span()
                tokens.append(token)
                spans.append((start, end))

            # Assign labels to tokens based on character labels
            token_labels = []
            for token, (start, end) in zip(tokens, spans):
                # Get the labels for this span
                span_labels = char_labels[start:end]

                # Count the labels in this span
                label_counts = Counter(span_labels)

                # Prioritize B- labels, then I- labels, then O
                b_labels = [l for l in span_labels if l.startswith('B-')]
                i_labels = [l for l in span_labels if l.startswith('I-')]

                if b_labels:
                    # Use the most frequent B- label
                    most_common_b = Counter(b_labels).most_common(1)[0][0]
                    token_labels.append(most_common_b)
                elif i_labels:
                    # Use the most common I- label
                    most_common_i = Counter(i_labels).most_common(1)[0][0]
                    token_labels.append(most_common_i)
                else:
                    # No entity
                    token_labels.append('O')

            processed_data.append({
                'tokens': tokens,
                'labels': token_labels,
                'original_text': content
            })

        except Exception as e:
            print(f"Error processing example: {str(e)}")
            continue

    return processed_data

# Process all data
processed_data = convert_data_to_bio_format(data)
print(f"\nProcessed {len(processed_data)} examples to BIO format")


Processed 430 examples to BIO format


In [None]:

# Show an example of processed data
if processed_data:
    example = processed_data[0]
    print("\nExample processed data (first 10 tokens):")
    for token, label in zip(example['tokens'][:10], example['labels'][:10]):
        print(f"{token} -> {label}")

    # Analyze label distribution
    all_processed_labels = []
    for example in processed_data:
        all_processed_labels.extend(example['labels'])

    label_counts = Counter(all_processed_labels)
    print("\nLabel distribution in processed data:")
    for label, count in label_counts.most_common():
        print(f"{label}: {count}")
else:
    print("No processed data available.")


Example processed data (first 10 tokens):
Abhishek -> B-NAME
Jha -> I-NAME
Application -> B-DESIGNATION
Development -> I-DESIGNATION
Associate -> I-DESIGNATION
- -> O
Accenture -> B-COMPANY
Bengaluru, -> B-LOCATION
Karnataka -> O
- -> O

Label distribution in processed data:
O: 184035
I-SKILLS: 13883
I-COLLEGE NAME: 1698
I-DESIGNATION: 1636
I-DEGREE: 1584
B-COMPANY: 1186
B-DESIGNATION: 928
I-COMPANY: 875
B-SKILLS: 788
B-LOCATION: 783
B-COLLEGE NAME: 612
B-DEGREE: 571
B-EMAIL: 443
B-NAME: 438
I-NAME: 429
I-EMAIL: 186
I-LOCATION: 50


In [None]:
# STEP 1: Add entity presence info
for example in processed_data:
    present_entities = set()
    for label in example['labels']:
        if label != 'O':
            entity_type = label.split('-')[1]
            present_entities.add(entity_type)
    example['entity_presence'] = '+'.join(sorted(present_entities)) if present_entities else 'none'

# STEP 2: Count combinations
entity_presence_counts = Counter(ex['entity_presence'] for ex in processed_data)
print("\nEntity distribution before split:")
for entity_type, count in entity_presence_counts.most_common():
    print(f"{entity_type}: {count}")


Entity distribution before split:
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 193
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME: 42
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 30
COLLEGE NAME+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 17
COLLEGE NAME+COMPANY+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 14
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 14
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+NAME+SKILLS: 13
COLLEGE NAME+COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 11
COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 10
COLLEGE NAME+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 7
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+NAME+SKILLS: 7
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME: 6
DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 6
COLLEGE NAME+DEGREE+DESIGNATION+LOCATION+NAME: 5
COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 4
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME: 4
COLLEGE NAME+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 4
COLLE

In [None]:
# STEP 3: Replace rare combinations with 'OTHER' to retain all data
for example in processed_data:
    if entity_presence_counts[example['entity_presence']] < 2:
        example['entity_presence'] = 'OTHER'

# STEP 4: Final count after merging rare combinations
entity_presence_counts = Counter(ex['entity_presence'] for ex in processed_data)
print("\nEntity distribution after merging rare combinations:")
for entity_type, count in entity_presence_counts.most_common():
    print(f"{entity_type}: {count}")


Entity distribution after merging rare combinations:
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 193
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME: 42
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 30
COLLEGE NAME+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 17
COLLEGE NAME+COMPANY+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 14
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 14
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+NAME+SKILLS: 13
COLLEGE NAME+COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 11
COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 10
COLLEGE NAME+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 7
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+NAME+SKILLS: 7
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME: 6
DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 6
COLLEGE NAME+DEGREE+DESIGNATION+LOCATION+NAME: 5
COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 4
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME: 4
COLLEGE NAME+DESIGNATION+EMAIL+LOCATION+N

In [None]:
# STEP 5: Stratified split once (Train vs Temp)
used_stratified = False

try:
    train_data, temp_data = train_test_split(
        processed_data,
        test_size=0.3,
        random_state=42,
        shuffle=True,
        stratify=[ex['entity_presence'] for ex in processed_data]
    )
    used_stratified = True

    val_data, test_data = train_test_split(
        temp_data,
        test_size=0.5,
        random_state=42,
        shuffle=True
    )

except ValueError as e:
    print(f"⚠ Stratified split failed: {e}")
    print("❗ Falling back to full random splitting.")

    train_data, temp_data = train_test_split(
        processed_data, test_size=0.3, random_state=42, shuffle=True
    )
    val_data, test_data = train_test_split(
        temp_data, test_size=0.5, random_state=42, shuffle=True
    )

# STEP 6: Final summary
print("\n✅ Data splitting complete:")
print(f"Train: {len(train_data)}")
print(f"Validation: {len(val_data)}")
print(f"Test: {len(test_data)}")
print("Stratified splitting used ✅" if used_stratified else "Random splitting used ⚠️")



✅ Data splitting complete:
Train: 301
Validation: 64
Test: 65
Stratified splitting used ✅


In [None]:
# Data Augmentation Functions
import random
def augment_token_replacement(tokens, labels, replacement_prob=0.15):
    """Replace random non-entity tokens with similar length tokens"""
    augmented_tokens = tokens.copy()

    # Create a pool of non-entity tokens from the TRAINING dataset only
    non_entity_tokens = {}
    for length in range(1, 15):  # Group tokens by length
        non_entity_tokens[length] = []

    # Group non-entity tokens by length from training data only
    for example in train_data:  # Use train_data instead of processed_data
        for token, label in zip(example['tokens'], example['labels']):
            if label == 'O':
                length = min(len(token), 14)  # Cap at 14 to avoid sparse groups
                non_entity_tokens[length].append(token)

    # Replace non-entity tokens with probability replacement_prob
    for i, (token, label) in enumerate(zip(tokens, labels)):
        if label == 'O' and random.random() < replacement_prob:
            token_length = min(len(token), 14)
            if non_entity_tokens[token_length]:  # If we have replacements of this length
                replacement = random.choice(non_entity_tokens[token_length])
                augmented_tokens[i] = replacement

    return augmented_tokens, labels

def augment_token_deletion(tokens, labels, deletion_prob=0.05):
    """Delete random non-entity tokens"""
    augmented_tokens = []
    augmented_labels = []

    for token, label in zip(tokens, labels):
        # Only delete O-labeled tokens with a certain probability
        if label == 'O' and random.random() < deletion_prob:
            continue
        augmented_tokens.append(token)
        augmented_labels.append(label)

    return augmented_tokens, augmented_labels

def augment_token_swap(tokens, labels, swap_prob=0.05):
    """Swap adjacent non-entity tokens"""
    augmented_tokens = tokens.copy()

    # Find eligible positions for swapping (adjacent O tokens)
    eligible_positions = []
    for i in range(len(labels) - 1):
        if labels[i] == 'O' and labels[i + 1] == 'O':
            eligible_positions.append(i)

    # Perform swaps
    num_swaps = int(len(eligible_positions) * swap_prob)
    if eligible_positions and num_swaps > 0:
        positions_to_swap = random.sample(eligible_positions, min(num_swaps, len(eligible_positions)))
        for pos in positions_to_swap:
            augmented_tokens[pos], augmented_tokens[pos + 1] = augmented_tokens[pos + 1], augmented_tokens[pos]

    return augmented_tokens, labels

def augment_data(data, augmentation_factor=2):
    """Apply data augmentation to increase dataset size"""
    augmented_data = []

    # First add all original examples
    augmented_data.extend(data)

    print(f"\nApplying data augmentation with factor {augmentation_factor}...")
    # Generate augmented examples
    for _ in range(augmentation_factor - 1):
        for example in data:
            tokens = example['tokens']
            labels = example['labels']

            # Choose random augmentation technique
            augmentation_choice = random.choice(['replace', 'delete', 'swap', 'combined'])

            if augmentation_choice == 'replace':
                aug_tokens, aug_labels = augment_token_replacement(tokens, labels)
            elif augmentation_choice == 'delete':
                aug_tokens, aug_labels = augment_token_deletion(tokens, labels)
            elif augmentation_choice == 'swap':
                aug_tokens, aug_labels = augment_token_swap(tokens, labels)
            else:  # combined
                # Apply multiple augmentations sequentially
                aug_tokens, aug_labels = tokens.copy(), labels.copy()

                if random.random() < 0.5:
                    aug_tokens, aug_labels = augment_token_replacement(aug_tokens, aug_labels, 0.1)
                if random.random() < 0.5:
                    aug_tokens, aug_labels = augment_token_deletion(aug_tokens, aug_labels, 0.03)
                if random.random() < 0.5:
                    aug_tokens, aug_labels = augment_token_swap(aug_tokens, aug_labels, 0.03)

            # Create augmented example
            augmented_data.append({
                'tokens': aug_tokens,
                'labels': aug_labels,
                'original_text': example['original_text'],
                'entity_presence': example['entity_presence'],
                'augmented': True
            })

    return augmented_data

# APPLY AUGMENTATION ONLY TO TRAINING DATA
AUGMENTATION_FACTOR = 3  # Original data + 2x augmented data
train_data_augmented = augment_data(train_data, AUGMENTATION_FACTOR)
print(f"Training data augmentation complete. Training set size increased from {len(train_data)} to {len(train_data_augmented)}")



Applying data augmentation with factor 3...
Training data augmentation complete. Training set size increased from 301 to 903


In [None]:

# Count entity presence distribution after augmentation for training data
train_entity_presence_counts = Counter(ex['entity_presence'] for ex in train_data_augmented)
print("\nTraining data entity distribution after augmentation:")
for entity_type, count in train_entity_presence_counts.most_common():
    print(f"{entity_type}: {count}")


Training data entity distribution after augmentation:
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 405
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME: 87
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 63
COLLEGE NAME+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 36
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 30
COLLEGE NAME+COMPANY+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 30
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+NAME+SKILLS: 27
COLLEGE NAME+COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 24
COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 21
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+NAME+SKILLS: 15
COLLEGE NAME+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 15
DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 12
COLLEGE NAME+DEGREE+DESIGNATION+LOCATION+NAME: 12
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME: 12
COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 9
COLLEGE NAME+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 9
COMPANY+DESIGNATION+EMA

In [None]:

# Check class distribution in splits
def print_entity_distribution(dataset, name):
    entity_presence = Counter([ex['entity_presence'] for ex in dataset])
    print(f"\n{name} set entity distribution:")
    for entity_type, count in entity_presence.most_common():
        print(f"{entity_type}: {count}")

print_entity_distribution(train_data_augmented, "Training (Augmented)")
print_entity_distribution(val_data, "Validation")
print_entity_distribution(test_data, "Test")


Training (Augmented) set entity distribution:
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 405
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME: 87
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 63
COLLEGE NAME+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 36
COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 30
COLLEGE NAME+COMPANY+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 30
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+EMAIL+NAME+SKILLS: 27
COLLEGE NAME+COMPANY+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 24
COMPANY+DEGREE+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 21
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+NAME+SKILLS: 15
COLLEGE NAME+DEGREE+EMAIL+LOCATION+NAME+SKILLS: 15
DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 12
COLLEGE NAME+DEGREE+DESIGNATION+LOCATION+NAME: 12
COLLEGE NAME+COMPANY+DEGREE+DESIGNATION+LOCATION+NAME: 12
COMPANY+DEGREE+DESIGNATION+LOCATION+NAME+SKILLS: 9
COLLEGE NAME+DESIGNATION+EMAIL+LOCATION+NAME+SKILLS: 9
COMPANY+DESIGNATION+EMAIL+LOCAT

In [None]:
# Fixed and improved ResumeNERDataset class for BERT
class ResumeNERDataset(Dataset):
    def __init__(self, texts, tags, tokenizer, max_len, label2id):
        self.texts = texts
        self.tags = tags
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.label2id = label2id

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item_idx):
        text = self.texts[item_idx]
        tags = self.tags[item_idx]

        # Ensure text and tags have the same length
        if len(text) != len(tags):
            min_len = min(len(text), len(tags))
            text = text[:min_len]
            tags = tags[:min_len]

        # Tokenize with BERT tokenizer
        encoding = self.tokenizer(
            text,
            is_split_into_words=True,
            return_offsets_mapping=True,
            padding='max_length',
            truncation=True,
            max_length=self.max_len,
            return_tensors='pt'
        )

        # Create label tensor with proper alignment to wordpieces
        word_ids = encoding.word_ids()
        previous_word_idx = None
        label_ids = []

        for word_idx in word_ids:
            if word_idx is None:
                # Special tokens like [CLS] and [SEP]
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                # First token of a word
                if word_idx < len(tags):
                    label_ids.append(self.label2id.get(tags[word_idx], self.label2id['O']))
                else:
                    label_ids.append(-100)
            else:
                # Continuation of a word (WordPiece)
                if tags[previous_word_idx].startswith('B-'):
                    # Convert B- to I- for continuation tokens
                    i_tag = f"I-{tags[previous_word_idx][2:]}"
                    label_ids.append(self.label2id.get(i_tag, self.label2id['O']))
                else:
                    # Keep the same tag for other cases
                    label_ids.append(self.label2id.get(tags[previous_word_idx], self.label2id['O']))

            previous_word_idx = word_idx

        # Create final item with all tensors
        item = {key: val.squeeze() for key, val in encoding.items() if key != 'offset_mapping'}
        item['labels'] = torch.tensor(label_ids)

        return item


In [None]:

from transformers import BertTokenizerFast, BertForTokenClassification
# Prepare datasets with adequate max length using BERT
MAX_LEN = 512
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# Use augmented training data, but original validation and test data
train_texts = [example['tokens'] for example in train_data_augmented]
train_tags = [example['labels'] for example in train_data_augmented]
val_texts = [example['tokens'] for example in val_data]
val_tags = [example['labels'] for example in val_data]
test_texts = [example['tokens'] for example in test_data]
test_tags = [example['labels'] for example in test_data]




train_dataset = ResumeNERDataset(train_texts, train_tags, tokenizer, MAX_LEN, label2id)
val_dataset = ResumeNERDataset(val_texts, val_tags, tokenizer, MAX_LEN, label2id)
test_dataset = ResumeNERDataset(test_texts, test_tags, tokenizer, MAX_LEN, label2id)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:

# Create data loaders with appropriate batch size for your data
BATCH_SIZE = 8  # Reduced to allow more gradient updates and better learning
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [None]:

# Initialize BERT model
model = BertForTokenClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id
)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")
model.to(device)


Using device: cuda


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [None]:
# Improved training parameters
LEARNING_RATE = 3e-5  # Slightly higher learning rate
EPOCHS = 30  # More epochs for better learning
WEIGHT_DECAY = 0.01  # Add weight decay to reduce overfitting

In [None]:
# Configure optimizer with weight decay
optimizer = AdamW(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

In [None]:
# Setup scheduler with warmup
total_steps = len(train_dataloader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * total_steps),  # 10% warmup
    num_training_steps=total_steps
)

In [None]:
# Enhanced training function with gradient accumulation
def train(accumulation_steps=4):
    model.train()
    total_loss = 0

    progress_bar = tqdm(train_dataloader, desc="Training")
    optimizer.zero_grad()

    for idx, batch in enumerate(progress_bar):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        loss = loss / accumulation_steps  # Normalize loss for accumulation
        loss.backward()

        total_loss += loss.item() * accumulation_steps  # Denormalize for reporting

        # Accumulated gradient update
        if (idx + 1) % accumulation_steps == 0 or (idx + 1) == len(train_dataloader):
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        progress_bar.set_postfix({'loss': f"{loss.item() * accumulation_steps:.4f}"})

    return total_loss / len(train_dataloader)

In [None]:
# Change 1: Add accuracy metric to the evaluate function
def evaluate(dataloader):
    model.eval()
    total_loss = 0
    predictions = []
    true_labels = []

    # Variables to track token-level accuracy
    correct_tokens = 0
    total_tokens = 0

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            loss = outputs.loss
            total_loss += loss.item()

            # Get predictions and true labels
            logits = outputs.logits
            preds = torch.argmax(logits, dim=2)

            # Remove ignored index (-100)
            for i in range(input_ids.shape[0]):
                pred_list = []
                true_list = []
                for j in range(input_ids.shape[1]):
                    if labels[i, j] != -100:
                        pred_list.append(id2label[preds[i, j].item()])
                        true_list.append(id2label[labels[i, j].item()])

                        # Count correct token predictions for accuracy
                        if preds[i, j].item() == labels[i, j].item():
                            correct_tokens += 1
                        total_tokens += 1

                predictions.append(pred_list)
                true_labels.append(true_list)

    # Calculate token-level accuracy
    token_accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0

    # Calculate metrics using seqeval
    report = classification_report(true_labels, predictions, output_dict=True)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)

    # Print token-level accuracy
    print(f"\nToken-level accuracy: {token_accuracy:.4f}")

    # Print entity-level metrics
    print("\nEntity-level metrics:")
    for entity_type, metrics in report.items():
        if entity_type not in ['micro avg', 'macro avg', 'weighted avg', 'O']:
            print(f"{entity_type}:")
            print(f"  Precision: {metrics['precision']:.4f}")
            print(f"  Recall: {metrics['recall']:.4f}")
            print(f"  F1: {metrics['f1-score']:.4f}")
            print(f"  Support: {metrics['support']}")

    return total_loss / len(dataloader), report, precision, recall, f1, token_accuracy

In [None]:
# Change 2: Update the training loop to track accuracy
train_losses = []
val_losses = []
val_accuracies = []  # New list to track validation accuracies
best_val_f1 = 0
best_model_path = "best_resume_ner_model.pt"
patience = 4  # Early stopping patience
no_improvement = 0

In [None]:
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")

    # Train
    train_loss = train()
    train_losses.append(train_loss)

    # Evaluate on validation set
    val_loss, val_report, val_precision, val_recall, val_f1, val_accuracy = evaluate(val_dataloader)
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)  # Track validation accuracy

    print(f"Train Loss: {train_loss:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1: {val_f1:.4f}")

    # Save best model and check for early stopping
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), best_model_path)
        print(f"New best model saved with F1: {val_f1:.4f}")
        no_improvement = 0
    else:
        no_improvement += 1
        print(f"No improvement for {no_improvement} epochs")

        if no_improvement >= patience:
            print("Early stopping triggered")
            break



Epoch 1/30


Training: 100%|██████████| 113/113 [01:28<00:00,  1.27it/s, loss=2.3918]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.61it/s]



Token-level accuracy: 0.5452

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 55
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 149
DEGREE:
  Precision: 0.0005
  Recall: 0.0208
  F1: 0.0010
  Support: 48
DESIGNATION:
  Precision: 0.0026
  Recall: 0.0140
  F1: 0.0044
  Support: 143
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 70
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 98
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0007
  Recall: 0.0132
  F1: 0.0014
  Support: 76
Train Loss: 2.8257
Val Loss: 2.3670, Accuracy: 0.5452, Precision: 0.0005, Recall: 0.0057, F1: 0.0009
New best model saved with F1: 0.0009

Epoch 2/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=1.3221]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.77it/s]



Token-level accuracy: 0.7787

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 55
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 149
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 48
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 143
EMAIL:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 70
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 98
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 76
Train Loss: 1.4823
Val Loss: 1.0224, Accuracy: 0.7787, Precision: 0.0000, Recall: 0.0000, F1: 0.0000
No improvement for 1 epochs

Epoch 3/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.6261]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.74it/s]



Token-level accuracy: 0.8181

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 55
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 149
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 48
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 143
EMAIL:
  Precision: 0.0251
  Recall: 0.0714
  F1: 0.0372
  Support: 70
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 98
NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 64
SKILLS:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 76
Train Loss: 0.8035
Val Loss: 0.6932, Accuracy: 0.8181, Precision: 0.0200, Recall: 0.0071, F1: 0.0105
New best model saved with F1: 0.0105

Epoch 4/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.6650]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.77it/s]



Token-level accuracy: 0.8516

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 55
COMPANY:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 149
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 48
DESIGNATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 143
EMAIL:
  Precision: 0.1553
  Recall: 0.4857
  F1: 0.2353
  Support: 70
LOCATION:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 98
NAME:
  Precision: 0.1892
  Recall: 0.2188
  F1: 0.2029
  Support: 64
SKILLS:
  Precision: 0.0038
  Recall: 0.0132
  F1: 0.0059
  Support: 76
Train Loss: 0.5733
Val Loss: 0.5092, Accuracy: 0.8516, Precision: 0.0831, Recall: 0.0697, F1: 0.0758
New best model saved with F1: 0.0758

Epoch 5/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.3433]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.62it/s]



Token-level accuracy: 0.8821

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.0088
  Recall: 0.0182
  F1: 0.0118
  Support: 55
COMPANY:
  Precision: 0.0833
  Recall: 0.0134
  F1: 0.0231
  Support: 149
DEGREE:
  Precision: 0.0000
  Recall: 0.0000
  F1: 0.0000
  Support: 48
DESIGNATION:
  Precision: 0.2523
  Recall: 0.1888
  F1: 0.2160
  Support: 143
EMAIL:
  Precision: 0.3607
  Recall: 0.6286
  F1: 0.4583
  Support: 70
LOCATION:
  Precision: 0.0364
  Recall: 0.0204
  F1: 0.0261
  Support: 98
NAME:
  Precision: 0.5890
  Recall: 0.6719
  F1: 0.6277
  Support: 64
SKILLS:
  Precision: 0.0131
  Recall: 0.0395
  F1: 0.0197
  Support: 76
Train Loss: 0.4192
Val Loss: 0.3865, Accuracy: 0.8821, Precision: 0.1642, Recall: 0.1735, F1: 0.1687
New best model saved with F1: 0.1687

Epoch 6/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.2955]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.72it/s]



Token-level accuracy: 0.9000

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.1340
  Recall: 0.2364
  F1: 0.1711
  Support: 55
COMPANY:
  Precision: 0.1698
  Recall: 0.0604
  F1: 0.0891
  Support: 149
DEGREE:
  Precision: 0.2444
  Recall: 0.2292
  F1: 0.2366
  Support: 48
DESIGNATION:
  Precision: 0.2393
  Recall: 0.2727
  F1: 0.2549
  Support: 143
EMAIL:
  Precision: 0.3955
  Recall: 0.7571
  F1: 0.5196
  Support: 70
LOCATION:
  Precision: 0.3465
  Recall: 0.3571
  F1: 0.3518
  Support: 98
NAME:
  Precision: 0.6974
  Recall: 0.8281
  F1: 0.7571
  Support: 64
SKILLS:
  Precision: 0.0282
  Recall: 0.0526
  F1: 0.0367
  Support: 76
Train Loss: 0.3233
Val Loss: 0.3304, Accuracy: 0.9000, Precision: 0.2676, Recall: 0.3087, F1: 0.2867
New best model saved with F1: 0.2867

Epoch 7/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.1441]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.76it/s]



Token-level accuracy: 0.9278

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.2821
  Recall: 0.4000
  F1: 0.3308
  Support: 55
COMPANY:
  Precision: 0.3250
  Recall: 0.3490
  F1: 0.3366
  Support: 149
DEGREE:
  Precision: 0.5455
  Recall: 0.5000
  F1: 0.5217
  Support: 48
DESIGNATION:
  Precision: 0.2872
  Recall: 0.3916
  F1: 0.3314
  Support: 143
EMAIL:
  Precision: 0.6180
  Recall: 0.7857
  F1: 0.6918
  Support: 70
LOCATION:
  Precision: 0.4444
  Recall: 0.5714
  F1: 0.5000
  Support: 98
NAME:
  Precision: 0.8028
  Recall: 0.8906
  F1: 0.8444
  Support: 64
SKILLS:
  Precision: 0.0917
  Recall: 0.1316
  F1: 0.1081
  Support: 76
Train Loss: 0.2559
Val Loss: 0.2537, Accuracy: 0.9278, Precision: 0.3807, Recall: 0.4723, F1: 0.4216
New best model saved with F1: 0.4216

Epoch 8/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.1430]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.69it/s]



Token-level accuracy: 0.9235

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.3462
  Recall: 0.4909
  F1: 0.4060
  Support: 55
COMPANY:
  Precision: 0.3626
  Recall: 0.4430
  F1: 0.3988
  Support: 149
DEGREE:
  Precision: 0.5476
  Recall: 0.4792
  F1: 0.5111
  Support: 48
DESIGNATION:
  Precision: 0.4259
  Recall: 0.4825
  F1: 0.4525
  Support: 143
EMAIL:
  Precision: 0.4231
  Recall: 0.7857
  F1: 0.5500
  Support: 70
LOCATION:
  Precision: 0.5333
  Recall: 0.6531
  F1: 0.5872
  Support: 98
NAME:
  Precision: 0.8696
  Recall: 0.9375
  F1: 0.9023
  Support: 64
SKILLS:
  Precision: 0.0859
  Recall: 0.1447
  F1: 0.1078
  Support: 76
Train Loss: 0.1952
Val Loss: 0.2545, Accuracy: 0.9235, Precision: 0.4116, Recall: 0.5334, F1: 0.4647
New best model saved with F1: 0.4647

Epoch 9/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.1042]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.61it/s]



Token-level accuracy: 0.9428

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.4242
  Recall: 0.5091
  F1: 0.4628
  Support: 55
COMPANY:
  Precision: 0.4110
  Recall: 0.6040
  F1: 0.4891
  Support: 149
DEGREE:
  Precision: 0.6000
  Recall: 0.5000
  F1: 0.5455
  Support: 48
DESIGNATION:
  Precision: 0.4033
  Recall: 0.5105
  F1: 0.4506
  Support: 143
EMAIL:
  Precision: 0.8056
  Recall: 0.8286
  F1: 0.8169
  Support: 70
LOCATION:
  Precision: 0.6111
  Recall: 0.7857
  F1: 0.6875
  Support: 98
NAME:
  Precision: 0.8939
  Recall: 0.9219
  F1: 0.9077
  Support: 64
SKILLS:
  Precision: 0.2037
  Recall: 0.1447
  F1: 0.1692
  Support: 76
Train Loss: 0.1527
Val Loss: 0.2226, Accuracy: 0.9428, Precision: 0.5097, Recall: 0.5974, F1: 0.5501
New best model saved with F1: 0.5501

Epoch 10/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.1552]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.71it/s]



Token-level accuracy: 0.9485

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.4627
  Recall: 0.5636
  F1: 0.5082
  Support: 55
COMPANY:
  Precision: 0.4427
  Recall: 0.5705
  F1: 0.4985
  Support: 149
DEGREE:
  Precision: 0.6512
  Recall: 0.5833
  F1: 0.6154
  Support: 48
DESIGNATION:
  Precision: 0.4793
  Recall: 0.5664
  F1: 0.5192
  Support: 143
EMAIL:
  Precision: 0.5357
  Recall: 0.8571
  F1: 0.6593
  Support: 70
LOCATION:
  Precision: 0.6875
  Recall: 0.7857
  F1: 0.7333
  Support: 98
NAME:
  Precision: 0.9077
  Recall: 0.9219
  F1: 0.9147
  Support: 64
SKILLS:
  Precision: 0.2821
  Recall: 0.2895
  F1: 0.2857
  Support: 76
Train Loss: 0.1174
Val Loss: 0.1851, Accuracy: 0.9485, Precision: 0.5286, Recall: 0.6302, F1: 0.5750
New best model saved with F1: 0.5750

Epoch 11/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0618]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.75it/s]



Token-level accuracy: 0.9561

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5303
  Recall: 0.6364
  F1: 0.5785
  Support: 55
COMPANY:
  Precision: 0.5723
  Recall: 0.6644
  F1: 0.6149
  Support: 149
DEGREE:
  Precision: 0.6889
  Recall: 0.6458
  F1: 0.6667
  Support: 48
DESIGNATION:
  Precision: 0.5301
  Recall: 0.6154
  F1: 0.5696
  Support: 143
EMAIL:
  Precision: 0.7407
  Recall: 0.8571
  F1: 0.7947
  Support: 70
LOCATION:
  Precision: 0.7521
  Recall: 0.8980
  F1: 0.8186
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.3636
  Recall: 0.3158
  F1: 0.3380
  Support: 76
Train Loss: 0.0946
Val Loss: 0.1730, Accuracy: 0.9561, Precision: 0.6264, Recall: 0.6942, F1: 0.6586
New best model saved with F1: 0.6586

Epoch 12/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0398]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.69it/s]



Token-level accuracy: 0.9546

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.5909
  Recall: 0.7091
  F1: 0.6446
  Support: 55
COMPANY:
  Precision: 0.5556
  Recall: 0.7383
  F1: 0.6340
  Support: 149
DEGREE:
  Precision: 0.6889
  Recall: 0.6458
  F1: 0.6667
  Support: 48
DESIGNATION:
  Precision: 0.5793
  Recall: 0.6643
  F1: 0.6189
  Support: 143
EMAIL:
  Precision: 0.7792
  Recall: 0.8571
  F1: 0.8163
  Support: 70
LOCATION:
  Precision: 0.7258
  Recall: 0.9184
  F1: 0.8108
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.3810
  Recall: 0.3158
  F1: 0.3453
  Support: 76
Train Loss: 0.0748
Val Loss: 0.2023, Accuracy: 0.9546, Precision: 0.6384, Recall: 0.7283, F1: 0.6804
New best model saved with F1: 0.6804

Epoch 13/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0309]
Evaluating: 100%|██████████| 8/8 [00:05<00:00,  1.60it/s]



Token-level accuracy: 0.9595

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6406
  Recall: 0.7455
  F1: 0.6891
  Support: 55
COMPANY:
  Precision: 0.6548
  Recall: 0.7383
  F1: 0.6940
  Support: 149
DEGREE:
  Precision: 0.6863
  Recall: 0.7292
  F1: 0.7071
  Support: 48
DESIGNATION:
  Precision: 0.6226
  Recall: 0.6923
  F1: 0.6556
  Support: 143
EMAIL:
  Precision: 0.5755
  Recall: 0.8714
  F1: 0.6932
  Support: 70
LOCATION:
  Precision: 0.8214
  Recall: 0.9388
  F1: 0.8762
  Support: 98
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.3649
  Recall: 0.3553
  F1: 0.3600
  Support: 76
Train Loss: 0.0588
Val Loss: 0.1837, Accuracy: 0.9595, Precision: 0.6600, Recall: 0.7511, F1: 0.7026
New best model saved with F1: 0.7026

Epoch 14/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0503]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.71it/s]



Token-level accuracy: 0.9633

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6774
  Recall: 0.7636
  F1: 0.7179
  Support: 55
COMPANY:
  Precision: 0.6528
  Recall: 0.6309
  F1: 0.6416
  Support: 149
DEGREE:
  Precision: 0.7255
  Recall: 0.7708
  F1: 0.7475
  Support: 48
DESIGNATION:
  Precision: 0.6972
  Recall: 0.6923
  F1: 0.6947
  Support: 143
EMAIL:
  Precision: 0.7692
  Recall: 0.8571
  F1: 0.8108
  Support: 70
LOCATION:
  Precision: 0.8835
  Recall: 0.9286
  F1: 0.9055
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.3500
  Recall: 0.3684
  F1: 0.3590
  Support: 76
Train Loss: 0.0528
Val Loss: 0.1629, Accuracy: 0.9633, Precision: 0.7090, Recall: 0.7312, F1: 0.7199
New best model saved with F1: 0.7199

Epoch 15/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0314]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.76it/s]



Token-level accuracy: 0.9581

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.6875
  Recall: 0.8000
  F1: 0.7395
  Support: 55
COMPANY:
  Precision: 0.6453
  Recall: 0.7450
  F1: 0.6916
  Support: 149
DEGREE:
  Precision: 0.6667
  Recall: 0.7917
  F1: 0.7238
  Support: 48
DESIGNATION:
  Precision: 0.6319
  Recall: 0.7203
  F1: 0.6732
  Support: 143
EMAIL:
  Precision: 0.6421
  Recall: 0.8714
  F1: 0.7394
  Support: 70
LOCATION:
  Precision: 0.7917
  Recall: 0.9694
  F1: 0.8716
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.5915
  Recall: 0.5526
  F1: 0.5714
  Support: 76
Train Loss: 0.0430
Val Loss: 0.2049, Accuracy: 0.9581, Precision: 0.6902, Recall: 0.7923, F1: 0.7377
New best model saved with F1: 0.7377

Epoch 16/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0097]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.70it/s]



Token-level accuracy: 0.9629

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7031
  Recall: 0.8182
  F1: 0.7563
  Support: 55
COMPANY:
  Precision: 0.6280
  Recall: 0.6913
  F1: 0.6581
  Support: 149
DEGREE:
  Precision: 0.6667
  Recall: 0.7917
  F1: 0.7238
  Support: 48
DESIGNATION:
  Precision: 0.7203
  Recall: 0.7203
  F1: 0.7203
  Support: 143
EMAIL:
  Precision: 0.7750
  Recall: 0.8857
  F1: 0.8267
  Support: 70
LOCATION:
  Precision: 0.9038
  Recall: 0.9592
  F1: 0.9307
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.4940
  Recall: 0.5395
  F1: 0.5157
  Support: 76
Train Loss: 0.0325
Val Loss: 0.1814, Accuracy: 0.9629, Precision: 0.7224, Recall: 0.7809, F1: 0.7505
New best model saved with F1: 0.7505

Epoch 17/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0224]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.61it/s]



Token-level accuracy: 0.9648

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7541
  Recall: 0.8364
  F1: 0.7931
  Support: 55
COMPANY:
  Precision: 0.6883
  Recall: 0.7114
  F1: 0.6997
  Support: 149
DEGREE:
  Precision: 0.6552
  Recall: 0.7917
  F1: 0.7170
  Support: 48
DESIGNATION:
  Precision: 0.7415
  Recall: 0.7622
  F1: 0.7517
  Support: 143
EMAIL:
  Precision: 0.8026
  Recall: 0.8714
  F1: 0.8356
  Support: 70
LOCATION:
  Precision: 0.8868
  Recall: 0.9592
  F1: 0.9216
  Support: 98
NAME:
  Precision: 0.9265
  Recall: 0.9844
  F1: 0.9545
  Support: 64
SKILLS:
  Precision: 0.5068
  Recall: 0.4868
  F1: 0.4966
  Support: 76
Train Loss: 0.0290
Val Loss: 0.1836, Accuracy: 0.9648, Precision: 0.7456, Recall: 0.7881, F1: 0.7663
New best model saved with F1: 0.7663

Epoch 18/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0308]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.71it/s]



Token-level accuracy: 0.9639

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7031
  Recall: 0.8182
  F1: 0.7563
  Support: 55
COMPANY:
  Precision: 0.6626
  Recall: 0.7248
  F1: 0.6923
  Support: 149
DEGREE:
  Precision: 0.6786
  Recall: 0.7917
  F1: 0.7308
  Support: 48
DESIGNATION:
  Precision: 0.7285
  Recall: 0.7692
  F1: 0.7483
  Support: 143
EMAIL:
  Precision: 0.7294
  Recall: 0.8857
  F1: 0.8000
  Support: 70
LOCATION:
  Precision: 0.8624
  Recall: 0.9592
  F1: 0.9082
  Support: 98
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.6479
  Recall: 0.6053
  F1: 0.6259
  Support: 76
Train Loss: 0.0225
Val Loss: 0.1965, Accuracy: 0.9639, Precision: 0.7399, Recall: 0.8051, F1: 0.7711
New best model saved with F1: 0.7711

Epoch 19/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0192]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.75it/s]



Token-level accuracy: 0.9635

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7188
  Recall: 0.8364
  F1: 0.7731
  Support: 55
COMPANY:
  Precision: 0.6485
  Recall: 0.7181
  F1: 0.6815
  Support: 149
DEGREE:
  Precision: 0.6724
  Recall: 0.8125
  F1: 0.7358
  Support: 48
DESIGNATION:
  Precision: 0.7208
  Recall: 0.7762
  F1: 0.7475
  Support: 143
EMAIL:
  Precision: 0.8333
  Recall: 0.8571
  F1: 0.8451
  Support: 70
LOCATION:
  Precision: 0.8545
  Recall: 0.9592
  F1: 0.9038
  Support: 98
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.5595
  Recall: 0.6184
  F1: 0.5875
  Support: 76
Train Loss: 0.0215
Val Loss: 0.1978, Accuracy: 0.9635, Precision: 0.7335, Recall: 0.8065, F1: 0.7683
No improvement for 1 epochs

Epoch 20/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0256]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.75it/s]



Token-level accuracy: 0.9639

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7667
  Recall: 0.8364
  F1: 0.8000
  Support: 55
COMPANY:
  Precision: 0.6810
  Recall: 0.7450
  F1: 0.7115
  Support: 149
DEGREE:
  Precision: 0.7193
  Recall: 0.8542
  F1: 0.7810
  Support: 48
DESIGNATION:
  Precision: 0.7237
  Recall: 0.7692
  F1: 0.7458
  Support: 143
EMAIL:
  Precision: 0.8971
  Recall: 0.8714
  F1: 0.8841
  Support: 70
LOCATION:
  Precision: 0.8611
  Recall: 0.9490
  F1: 0.9029
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.5952
  Recall: 0.6579
  F1: 0.6250
  Support: 76
Train Loss: 0.0176
Val Loss: 0.1980, Accuracy: 0.9639, Precision: 0.7596, Recall: 0.8179, F1: 0.7877
New best model saved with F1: 0.7877

Epoch 21/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0116]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.76it/s]



Token-level accuracy: 0.9672

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7797
  Recall: 0.8364
  F1: 0.8070
  Support: 55
COMPANY:
  Precision: 0.7134
  Recall: 0.7517
  F1: 0.7320
  Support: 149
DEGREE:
  Precision: 0.6949
  Recall: 0.8542
  F1: 0.7664
  Support: 48
DESIGNATION:
  Precision: 0.7303
  Recall: 0.7762
  F1: 0.7525
  Support: 143
EMAIL:
  Precision: 0.8889
  Recall: 0.9143
  F1: 0.9014
  Support: 70
LOCATION:
  Precision: 0.8774
  Recall: 0.9490
  F1: 0.9118
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.6625
  Recall: 0.6974
  F1: 0.6795
  Support: 76
Train Loss: 0.0135
Val Loss: 0.2078, Accuracy: 0.9672, Precision: 0.7773, Recall: 0.8293, F1: 0.8025
New best model saved with F1: 0.8025

Epoch 22/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0088]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.65it/s]



Token-level accuracy: 0.9642

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7541
  Recall: 0.8364
  F1: 0.7931
  Support: 55
COMPANY:
  Precision: 0.7019
  Recall: 0.7584
  F1: 0.7290
  Support: 149
DEGREE:
  Precision: 0.7455
  Recall: 0.8542
  F1: 0.7961
  Support: 48
DESIGNATION:
  Precision: 0.7063
  Recall: 0.7902
  F1: 0.7459
  Support: 143
EMAIL:
  Precision: 0.9403
  Recall: 0.9000
  F1: 0.9197
  Support: 70
LOCATION:
  Precision: 0.8636
  Recall: 0.9694
  F1: 0.9135
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.6923
  Recall: 0.7105
  F1: 0.7013
  Support: 76
Train Loss: 0.0130
Val Loss: 0.2148, Accuracy: 0.9642, Precision: 0.7768, Recall: 0.8364, F1: 0.8055
New best model saved with F1: 0.8055

Epoch 23/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0072]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.69it/s]



Token-level accuracy: 0.9663

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7419
  Recall: 0.8364
  F1: 0.7863
  Support: 55
COMPANY:
  Precision: 0.6933
  Recall: 0.7584
  F1: 0.7244
  Support: 149
DEGREE:
  Precision: 0.7407
  Recall: 0.8333
  F1: 0.7843
  Support: 48
DESIGNATION:
  Precision: 0.7219
  Recall: 0.7622
  F1: 0.7415
  Support: 143
EMAIL:
  Precision: 0.8108
  Recall: 0.8571
  F1: 0.8333
  Support: 70
LOCATION:
  Precision: 0.8636
  Recall: 0.9694
  F1: 0.9135
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.7105
  Recall: 0.7105
  F1: 0.7105
  Support: 76
Train Loss: 0.0114
Val Loss: 0.1907, Accuracy: 0.9663, Precision: 0.7682, Recall: 0.8250, F1: 0.7956
No improvement for 1 epochs

Epoch 24/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0173]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.61it/s]



Token-level accuracy: 0.9615

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7188
  Recall: 0.8364
  F1: 0.7731
  Support: 55
COMPANY:
  Precision: 0.7169
  Recall: 0.7987
  F1: 0.7556
  Support: 149
DEGREE:
  Precision: 0.7273
  Recall: 0.8333
  F1: 0.7767
  Support: 48
DESIGNATION:
  Precision: 0.7160
  Recall: 0.8112
  F1: 0.7607
  Support: 143
EMAIL:
  Precision: 0.7683
  Recall: 0.9000
  F1: 0.8289
  Support: 70
LOCATION:
  Precision: 0.8482
  Recall: 0.9694
  F1: 0.9048
  Support: 98
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.7237
  Recall: 0.7237
  F1: 0.7237
  Support: 76
Train Loss: 0.0114
Val Loss: 0.2225, Accuracy: 0.9615, Precision: 0.7625, Recall: 0.8492, F1: 0.8035
No improvement for 2 epochs

Epoch 25/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0136]
Evaluating: 100%|██████████| 8/8 [00:05<00:00,  1.60it/s]



Token-level accuracy: 0.9672

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7541
  Recall: 0.8364
  F1: 0.7931
  Support: 55
COMPANY:
  Precision: 0.7124
  Recall: 0.7315
  F1: 0.7219
  Support: 149
DEGREE:
  Precision: 0.7778
  Recall: 0.8750
  F1: 0.8235
  Support: 48
DESIGNATION:
  Precision: 0.7517
  Recall: 0.7622
  F1: 0.7569
  Support: 143
EMAIL:
  Precision: 0.8873
  Recall: 0.9000
  F1: 0.8936
  Support: 70
LOCATION:
  Precision: 0.8704
  Recall: 0.9592
  F1: 0.9126
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.7237
  Recall: 0.7237
  F1: 0.7237
  Support: 76
Train Loss: 0.0108
Val Loss: 0.2007, Accuracy: 0.9672, Precision: 0.7926, Recall: 0.8265, F1: 0.8092
New best model saved with F1: 0.8092

Epoch 26/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0040]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.63it/s]



Token-level accuracy: 0.9667

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7541
  Recall: 0.8364
  F1: 0.7931
  Support: 55
COMPANY:
  Precision: 0.7143
  Recall: 0.7383
  F1: 0.7261
  Support: 149
DEGREE:
  Precision: 0.7193
  Recall: 0.8542
  F1: 0.7810
  Support: 48
DESIGNATION:
  Precision: 0.7468
  Recall: 0.8042
  F1: 0.7744
  Support: 143
EMAIL:
  Precision: 0.8649
  Recall: 0.9143
  F1: 0.8889
  Support: 70
LOCATION:
  Precision: 0.8624
  Recall: 0.9592
  F1: 0.9082
  Support: 98
NAME:
  Precision: 0.9403
  Recall: 0.9844
  F1: 0.9618
  Support: 64
SKILLS:
  Precision: 0.7037
  Recall: 0.7500
  F1: 0.7261
  Support: 76
Train Loss: 0.0094
Val Loss: 0.2037, Accuracy: 0.9667, Precision: 0.7794, Recall: 0.8393, F1: 0.8082
No improvement for 1 epochs

Epoch 27/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0026]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.76it/s]



Token-level accuracy: 0.9644

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7541
  Recall: 0.8364
  F1: 0.7931
  Support: 55
COMPANY:
  Precision: 0.7000
  Recall: 0.7047
  F1: 0.7023
  Support: 149
DEGREE:
  Precision: 0.7321
  Recall: 0.8542
  F1: 0.7885
  Support: 48
DESIGNATION:
  Precision: 0.7877
  Recall: 0.8042
  F1: 0.7958
  Support: 143
EMAIL:
  Precision: 0.8889
  Recall: 0.9143
  F1: 0.9014
  Support: 70
LOCATION:
  Precision: 0.8774
  Recall: 0.9490
  F1: 0.9118
  Support: 98
NAME:
  Precision: 0.9692
  Recall: 0.9844
  F1: 0.9767
  Support: 64
SKILLS:
  Precision: 0.7067
  Recall: 0.6974
  F1: 0.7020
  Support: 76
Train Loss: 0.0087
Val Loss: 0.2100, Accuracy: 0.9644, Precision: 0.7934, Recall: 0.8250, F1: 0.8089
No improvement for 2 epochs

Epoch 28/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0170]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.73it/s]



Token-level accuracy: 0.9643

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7419
  Recall: 0.8364
  F1: 0.7863
  Support: 55
COMPANY:
  Precision: 0.7000
  Recall: 0.7517
  F1: 0.7249
  Support: 149
DEGREE:
  Precision: 0.7736
  Recall: 0.8542
  F1: 0.8119
  Support: 48
DESIGNATION:
  Precision: 0.7548
  Recall: 0.8182
  F1: 0.7852
  Support: 143
EMAIL:
  Precision: 0.8451
  Recall: 0.8571
  F1: 0.8511
  Support: 70
LOCATION:
  Precision: 0.8532
  Recall: 0.9490
  F1: 0.8986
  Support: 98
NAME:
  Precision: 0.9545
  Recall: 0.9844
  F1: 0.9692
  Support: 64
SKILLS:
  Precision: 0.6463
  Recall: 0.6974
  F1: 0.6709
  Support: 76
Train Loss: 0.0086
Val Loss: 0.2174, Accuracy: 0.9643, Precision: 0.7718, Recall: 0.8321, F1: 0.8008
No improvement for 3 epochs

Epoch 29/30


Training: 100%|██████████| 113/113 [01:33<00:00,  1.21it/s, loss=0.0154]
Evaluating: 100%|██████████| 8/8 [00:04<00:00,  1.75it/s]



Token-level accuracy: 0.9679

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7419
  Recall: 0.8364
  F1: 0.7863
  Support: 55
COMPANY:
  Precision: 0.6879
  Recall: 0.7248
  F1: 0.7059
  Support: 149
DEGREE:
  Precision: 0.7778
  Recall: 0.8750
  F1: 0.8235
  Support: 48
DESIGNATION:
  Precision: 0.7338
  Recall: 0.7902
  F1: 0.7609
  Support: 143
EMAIL:
  Precision: 0.7412
  Recall: 0.9000
  F1: 0.8129
  Support: 70
LOCATION:
  Precision: 0.8407
  Recall: 0.9694
  F1: 0.9005
  Support: 98
NAME:
  Precision: 0.9403
  Recall: 0.9844
  F1: 0.9618
  Support: 64
SKILLS:
  Precision: 0.5870
  Recall: 0.7105
  F1: 0.6429
  Support: 76
Train Loss: 0.0107
Val Loss: 0.1891, Accuracy: 0.9679, Precision: 0.7449, Recall: 0.8307, F1: 0.7855
No improvement for 4 epochs
Early stopping triggered


In [None]:
# Change 3: Load best model and evaluate with accuracy on test set
print("\nLoading best model for testing...")
model.load_state_dict(torch.load(best_model_path))
test_loss, test_report, test_precision, test_recall, test_f1, test_accuracy = evaluate(test_dataloader)

print("\nFinal Test Results:")
print(f"Loss: {test_loss:.4f}")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall: {test_recall:.4f}")
print(f"F1 Score: {test_f1:.4f}")



Loading best model for testing...


Evaluating: 100%|██████████| 9/9 [00:05<00:00,  1.76it/s]



Token-level accuracy: 0.9493

Entity-level metrics:
COLLEGE NAME:
  Precision: 0.7818
  Recall: 0.8269
  F1: 0.8037
  Support: 52
COMPANY:
  Precision: 0.8103
  Recall: 0.8952
  F1: 0.8507
  Support: 105
DEGREE:
  Precision: 0.9038
  Recall: 0.9216
  F1: 0.9126
  Support: 51
DESIGNATION:
  Precision: 0.8468
  Recall: 0.8607
  F1: 0.8537
  Support: 122
EMAIL:
  Precision: 0.8533
  Recall: 0.9014
  F1: 0.8767
  Support: 71
LOCATION:
  Precision: 0.8839
  Recall: 0.8839
  F1: 0.8839
  Support: 112
NAME:
  Precision: 1.0000
  Recall: 0.9706
  F1: 0.9851
  Support: 68
SKILLS:
  Precision: 0.4182
  Recall: 0.6389
  F1: 0.5055
  Support: 36

Final Test Results:
Loss: 0.3211
Accuracy: 0.9493
Precision: 0.8260
Recall: 0.8768
F1 Score: 0.8506


In [None]:
# Change 4: Plot accuracy along with loss
plt.figure(figsize=(12, 10))

# Plot loss
plt.subplot(2, 1, 1)
plt.plot(range(1, len(train_losses) + 1), train_losses, 'b-', label='Training Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, 'r-', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plot accuracy
plt.subplot(2, 1, 2)
plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, 'g-', label='Validation Accuracy')
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('training_metrics.png')
plt.close()

In [None]:
# Improved prediction function with better entity extraction
def predict_entities(text):
    """Predict NER tags for a new text with improved entity extraction"""
    model.eval()

    # Tokenize the text
    tokens = []
    for match in re.finditer(r'\S+', text):
        tokens.append(match.group())

    # Prepare input for the model
    inputs = tokenizer(
        tokens,
        is_split_into_words=True,
        return_offsets_mapping=True,
        padding='max_length',
        truncation=True,
        max_length=MAX_LEN,
        return_tensors='pt'
    )

    # Move inputs to device
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    # Get model predictions
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=2)

    # Convert predictions to labels
    predicted_labels = []
    word_ids = inputs.word_ids(0)  # Batch index 0
    previous_word_idx = None

    for idx, word_idx in enumerate(word_ids):
        if word_idx is None or word_idx == previous_word_idx:
            continue

        if idx < len(predictions[0]):
            predicted_labels.append(id2label[predictions[0, idx].item()])
        else:
            predicted_labels.append('O')

        previous_word_idx = word_idx

    # Truncate predictions if needed
    predicted_labels = predicted_labels[:len(tokens)]

    # Combine tokens and predictions
    result = list(zip(tokens, predicted_labels))

    return result

In [None]:
# Improved entity grouping for better visualization
def group_entities(predictions):
    """Group tokens with the same entity type together with improved boundary detection"""
    entities = {}
    current_entity = None
    current_text = []

    for token, label in predictions:
        if label == 'O':
            if current_entity:
                if current_entity not in entities:
                    entities[current_entity] = []
                entities[current_entity].append(' '.join(current_text))
                current_entity = None
                current_text = []
        elif label.startswith('B-'):
            if current_entity:
                if current_entity not in entities:
                    entities[current_entity] = []
                entities[current_entity].append(' '.join(current_text))
            current_entity = label[2:]  # Remove B- prefix
            current_text = [token]
        elif label.startswith('I-'):
            if current_entity == label[2:]:  # Only append if it's the same entity type
                current_text.append(token)
            else:
                # Start a new entity if the I- tag doesn't match current entity
                if current_entity:
                    if current_entity not in entities:
                        entities[current_entity] = []
                    entities[current_entity].append(' '.join(current_text))
                current_entity = label[2:]
                current_text = [token]

    # Add the last entity if there is one
    if current_entity and current_text:
        if current_entity not in entities:
            entities[current_entity] = []
        entities[current_entity].append(' '.join(current_text))

    return entities

In [None]:
# Example prediction with more comprehensive resume text
example_text = """John Smith
Email: john.smith@example.com
Phone: +1-234-567-8901
LinkedIn: linkedin.com/in/johnsmith

A passionate software engineer with 3 years of experience in full-stack development. Proficient in JavaScript, React, Node.js, and Python.

Education
B.Tech in Computer Science, Indian Institute of Technology Madras, 2018 - 2022
CGPA: 8.7/10

Experience
Software Engineer at Google, Bangalore — Jul 2022 to Present
- Built scalable REST APIs using Node.js and Express
- Led a team of 3 in migrating the dashboard to React

Software Engineering Intern at Microsoft, Hyderabad — May 2021 to Jul 2021
- Developed internal tools using Python and Flask
- Improved application load time by 30%

Projects
Resume Parser using BERT — Built a tool to extract structured information from resumes
E-commerce Web App — MERN stack application with authentication and payment gateway integration

Skills
JavaScript, React, Node.js, Express, Python, Flask, MongoDB, PostgreSQL, Git, Docker
"""

In [None]:
print("\nRunning prediction on comprehensive example:")
print(f"Text: {example_text[:100]}...")
predictions = predict_entities(example_text)


Running prediction on comprehensive example:
Text: John Smith
Email: john.smith@example.com
Phone: +1-234-567-8901
LinkedIn: linkedin.com/in/johnsmith
...


In [None]:
print("\nPredicted Entities (first 20):")
for token, label in predictions[:20]:
    print(f"{token} -> {label}")


Predicted Entities (first 20):
John -> B-NAME
Smith -> I-NAME
Email: -> O
john.smith@example.com -> B-NAME
Phone: -> I-EMAIL
+1-234-567-8901 -> I-EMAIL
LinkedIn: -> O
linkedin.com/in/johnsmith -> I-EMAIL
A -> O
passionate -> O
software -> O
engineer -> O
with -> O
3 -> O
years -> O
of -> O
experience -> O
in -> O
full-stack -> O
development. -> O


In [None]:
print("\nGrouped Entities:")
grouped = group_entities(predictions)
for entity_type, mentions in grouped.items():
    print(f"{entity_type}: {mentions}")


Grouped Entities:
NAME: ['John Smith', 'john.smith@example.com']
EMAIL: ['Phone: +1-234-567-8901', 'linkedin.com/in/johnsmith']
SKILLS: ['JavaScript,', 'React,', 'Node.js,', 'Python.', 'JavaScript, React, Node.js, Express, Python, Flask, MongoDB, PostgreSQL, Git, Docker']
DEGREE: ['B.Tech in Computer Science,']
COLLEGE NAME: ['Indian Institute of Technology Madras,']
DESIGNATION: ['Software Engineer', 'Software Engineering']
