# Knowledge vs Reasoning Separation: Proof of Concept

This notebook implements a proof of concept for separating knowledge and reasoning in language models through ε-masking and structural hints.

## Research Question
How does limiting contextual knowledge with ε-masking and structural hints affect performance across tasks that differ in their demands for knowledge and reasoning?

## Method Overview
- Train small models from scratch with controlled knowledge exposure
- Apply ε-masking (ε ∈ {0.05–0.50}) while preserving structural hints
- Compare performance across 4 task quadrants
- Focus on trends rather than absolute performance


## 1. Installation and Imports


In [1]:
# Install required packages
%pip install transformers torch datasets spacy nltk scikit-learn matplotlib seaborn pandas numpy
!python -m spacy download en_core_web_sm


Collecting spacy
  Downloading spacy-3.8.7-cp311-cp311-macosx_11_0_arm64.whl.metadata (27 kB)
Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.13-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.2 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.11-cp311-cp311-macosx_11_0_arm64.whl.metadata (8.5 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.10-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.4 kB)
Collecting thinc<8.4.0,>=8.3.4 (from spacy)
  Downloading thinc-8.3.6-cp311-cp311-macosx_11_0_arm64.whl.metadata (15 kB)
Collecting wasabi<1.2.0,>=0.9.1 (from spacy)
  Downloading wasabi-1.1.3-py3-none

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import transformers
from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer, AutoTokenizer, AutoModelForCausalLM
import spacy
import nltk
import re
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, f1_score
from datasets import load_dataset
import json
from typing import List, Dict, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# Download NLTK data
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

print("All packages imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"Transformers version: {transformers.__version__}")


  from .autonotebook import tqdm as notebook_tqdm


All packages imported successfully!
PyTorch version: 2.2.2
Transformers version: 4.44.2


## 2. Configuration and Setup


In [4]:
# Configuration
class Config:
    # Model parameters
    MODEL_SIZE = "gpt2"  # Will use GPT-2 small for POC
    VOCAB_SIZE = 50257
    MAX_LENGTH = 512
    BATCH_SIZE = 8
    LEARNING_RATE = 5e-4
    NUM_EPOCHS = 3
    
    # Masking parameters
    EPSILON_VALUES = [0.05, 0.15, 0.30, 0.50]  # Different masking levels
    
    # Structural hints
    PRESERVE_FUNCTION_WORDS = True
    PRESERVE_PUNCTUATION = True
    PRESERVE_NER = True
    
    # Device
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
    
    # Random seed
    SEED = 42

# Set random seeds
torch.manual_seed(Config.SEED)
np.random.seed(Config.SEED)
random.seed(Config.SEED)

print(f"Using device: {Config.DEVICE}")
print(f"Configuration loaded successfully!")


Using device: mps
Configuration loaded successfully!


## 3. Data Preprocessing for Structural Hints


In [None]:
# Load spaCy model for NER
try:
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model loaded successfully!")
except OSError:
    print("Please install spaCy English model: python -m spacy download en_core_web_sm")
    nlp = None

# Function words (from NLTK)
from nltk.corpus import stopwords
function_words = set(stopwords.words('english'))

# Punctuation patterns
punctuation_pattern = re.compile(r'[\\.,!?;:\"\\-\\[\\](){}]')

class StructuralHintProcessor:
    """
    Processes text to extract and preserve structural hints while preparing for masking.
    """
    
    def __init__(self, preserve_function_words=True, preserve_punctuation=True, preserve_ner=True):
        self.preserve_function_words = preserve_function_words
        self.preserve_punctuation = preserve_punctuation
        self.preserve_ner = preserve_ner
        self.function_words = function_words if preserve_function_words else set()
        
    def extract_structural_hints(self, text: str) -> Dict:
        """
        Extract structural hints from text.
        Returns: dict with function_words, punctuation, ner_entities
        """
        hints = {
            'function_words': set(),
            'punctuation': set(),
            'ner_entities': {},
            'tokens': text.split()
        }
        
        # Extract function words
        if self.preserve_function_words:
            for token in hints['tokens']:
                clean_token = re.sub(r'[^a-zA-Z]', '', token.lower())
                if clean_token in self.function_words:
                    hints['function_words'].add(token)
        
        # Extract punctuation
        if self.preserve_punctuation:
            for match in punctuation_pattern.finditer(text):
                hints['punctuation'].add(match.group())
        
        # Extract NER entities
        if self.preserve_ner and nlp:
            doc = nlp(text)
            entity_id = 0
            for ent in doc.ents:
                entity_type = ent.label_
                entity_text = ent.text
                hints['ner_entities'][entity_text] = f"<{entity_type}_{entity_id}>"
                entity_id += 1
        
        return hints
    
    def apply_epsilon_masking(self, text: str, epsilon: float) -> str:
        """
        Apply ε-masking to text while preserving structural hints.
        """
        hints = self.extract_structural_hints(text)
        tokens = hints['tokens']
        masked_tokens = []
        
        for token in tokens:
            # Check if token should be preserved
            should_preserve = False
            
            # Preserve function words
            if self.preserve_function_words:
                clean_token = re.sub(r'[^a-zA-Z]', '', token.lower())
                if clean_token in self.function_words:
                    should_preserve = True
            
            # Preserve punctuation
            if self.preserve_punctuation and punctuation_pattern.match(token):
                should_preserve = True
            
            # Preserve NER entities (replace with typed IDs)
            if self.preserve_ner and token in hints['ner_entities']:
                masked_tokens.append(hints['ner_entities'][token])
                should_preserve = True
            
            # Apply masking based on epsilon
            if not should_preserve:
                if random.random() < epsilon:
                    masked_tokens.append('<MASK>')
                else:
                    masked_tokens.append(token)
            else:
                masked_tokens.append(token)
        
        return ' '.join(masked_tokens)

# Initialize processor
processor = StructuralHintProcessor(
    preserve_function_words=Config.PRESERVE_FUNCTION_WORDS,
    preserve_punctuation=Config.PRESERVE_PUNCTUATION,
    preserve_ner=Config.PRESERVE_NER
)

print("Structural hint processor initialized!")


## 4. Tokenization and Dimensionality Strategy

**Key Challenges:**
1. **Vocabulary Expansion**: NER entities become typed IDs (e.g., `<PERSON_0>`, `<ORG_1>`)
2. **Dynamic Vocabulary**: Each document may introduce new entity IDs
3. **Consistent Mapping**: Same entity should get same ID across contexts
4. **Mask Token**: Need special `<MASK>` token in vocabulary

**Solution Approach:**
- Use a **hybrid tokenization** strategy
- **Base vocabulary**: Standard GPT-2 tokenizer (50,257 tokens)
- **Extended vocabulary**: Add special tokens for masking and entity types
- **Entity mapping**: Maintain consistent entity-to-ID mapping per document
- **Fixed sequence length**: Pad/truncate to consistent length


In [None]:
class EnhancedTokenizer:
    """
    Enhanced tokenizer that handles structural hints and masking.
    Based on GPT-2 tokenizer with extensions for our research.
    """
    
    def __init__(self, base_model="gpt2"):
        # Load base tokenizer
        self.base_tokenizer = GPT2Tokenizer.from_pretrained(base_model)
        self.base_tokenizer.pad_token = self.base_tokenizer.eos_token
        
        # Add special tokens for our research
        special_tokens = {
            'mask_token': '<MASK>',
            'entity_start': '<ENT>',
            'entity_end': '</ENT>',
            'person_token': '<PERSON>',
            'org_token': '<ORG>',
            'location_token': '<LOC>',
            'date_token': '<DATE>',
            'misc_token': '<MISC>'
        }
        
        # Add special tokens to vocabulary
        self.base_tokenizer.add_special_tokens(special_tokens)
        
        # Store token mappings
        self.mask_token_id = self.base_tokenizer.convert_tokens_to_ids('<MASK>')
        self.pad_token_id = self.base_tokenizer.convert_tokens_to_ids('<|endoftext|>')
        
        # Entity ID counter (per document)
        self.entity_counter = 0
        self.entity_mapping = {}
        
        print(f"Enhanced tokenizer initialized with {len(self.base_tokenizer)} tokens")
        print(f"Special tokens: {special_tokens}")
    
    def reset_entity_mapping(self):
        """Reset entity mapping for new document."""
        self.entity_counter = 0
        self.entity_mapping = {}
    
    def create_entity_token(self, entity_type: str) -> str:
        """Create a unique entity token for this document."""
        entity_token = f"<{entity_type}_{self.entity_counter}>"
        self.entity_counter += 1
        return entity_token
    
    def tokenize_with_structural_hints(self, text: str, epsilon: float = 0.0) -> Dict:
        """
        Tokenize text with structural hints and masking.
        Returns tokenized input with metadata.
        """
        # Reset entity mapping for this text
        self.reset_entity_mapping()
        
        # Apply structural hint processing and masking
        processor = StructuralHintProcessor()
        masked_text = processor.apply_epsilon_masking(text, epsilon)
        
        # Tokenize the masked text
        tokens = self.base_tokenizer.encode(masked_text, add_special_tokens=True)
        
        # Create attention mask (1 for real tokens, 0 for padding)
        attention_mask = [1] * len(tokens)
        
        # Pad or truncate to fixed length
        max_length = 512
        if len(tokens) > max_length:
            tokens = tokens[:max_length]
            attention_mask = attention_mask[:max_length]
        else:
            # Pad with pad token
            padding_length = max_length - len(tokens)
            tokens.extend([self.pad_token_id] * padding_length)
            attention_mask.extend([0] * padding_length)
        
        return {
            'input_ids': torch.tensor(tokens, dtype=torch.long),
            'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
            'original_text': text,
            'masked_text': masked_text,
            'epsilon': epsilon
        }
    
    def decode_tokens(self, token_ids: List[int]) -> str:
        """Decode token IDs back to text."""
        return self.base_tokenizer.decode(token_ids, skip_special_tokens=False)

# Initialize enhanced tokenizer
enhanced_tokenizer = EnhancedTokenizer()
print("Enhanced tokenizer ready!")

# Demo the tokenization process
demo_text = "The Eiffel Tower in Paris was designed by Gustave Eiffel, a French engineer."
print(f"\nDemo text: {demo_text}")

for epsilon in [0.1, 0.3, 0.5]:
    result = enhanced_tokenizer.tokenize_with_structural_hints(demo_text, epsilon)
    print(f"\nε={epsilon}:")
    print(f"Masked text: {result['masked_text']}")
    print(f"Token count: {len(result['input_ids'])}")
    print(f"Input shape: {result['input_ids'].shape}")


## 6. Andrej Karpathy-Style Nano GPT with Masking


In [None]:
class LayerNorm(nn.Module):
    """LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False"""
    
    def __init__(self, ndim, bias):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(ndim))
        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None

    def forward(self, input):
        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)

class CausalSelfAttention(nn.Module):
    """Multi-head causal self-attention with optional masking awareness."""
    
    def __init__(self, config):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        # key, query, value projections for all heads, but in a batch
        self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
        # output projection
        self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
        # regularization
        self.attn_dropout = nn.Dropout(config.dropout)
        self.resid_dropout = nn.Dropout(config.dropout)
        self.n_head = config.n_head
        self.n_embd = config.n_embd
        self.dropout = config.dropout
        # flash attention make GPU go brrrrr but support is only in PyTorch >= 2.0
        self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
        if not self.flash:
            print("WARNING: using slow attention. Flash Attention requires PyTorch >= 2.0")
            # causal mask to ensure that attention is only applied to the left in the input sequence
            self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size))
                                        .view(1, 1, config.block_size, config.block_size))

    def forward(self, x, attention_mask=None):
        B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd)

        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
        q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)

        # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
        if self.flash:
            # efficient attention using Flash Attention CUDA kernels
            y = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=True)
        else:
            # manual implementation of attention
            att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
            att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))
            att = F.softmax(att, dim=-1)
            att = self.attn_dropout(att)
            y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
        y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side

        # output projection
        y = self.resid_dropout(self.c_proj(y))
        return y

class MLP(nn.Module):
    """Simple MLP with GELU activation."""
    
    def __init__(self, config):
        super().__init__()
        self.c_fc    = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
        self.gelu    = nn.GELU()
        self.c_proj  = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
        self.dropout = nn.Dropout(config.dropout)

    def forward(self, x):
        x = self.c_fc(x)
        x = self.gelu(x)
        x = self.c_proj(x)
        x = self.dropout(x)
        return x

class Block(nn.Module):
    """Transformer block: communication followed by computation."""
    
    def __init__(self, config):
        super().__init__()
        self.ln_1 = LayerNorm(config.n_embd, bias=config.bias)
        self.attn = CausalSelfAttention(config)
        self.ln_2 = LayerNorm(config.n_embd, bias=config.bias)
        self.mlp = MLP(config)

    def forward(self, x, attention_mask=None):
        x = x + self.attn(self.ln_1(x), attention_mask)
        x = x + self.mlp(self.ln_2(x))
        return x

class GPTConfig:
    """Configuration for GPT model."""
    
    def __init__(self, **kwargs):
        # Model parameters
        self.block_size = kwargs.get('block_size', 512)
        self.vocab_size = kwargs.get('vocab_size', 50257)
        self.n_layer = kwargs.get('n_layer', 6)
        self.n_head = kwargs.get('n_head', 8)
        self.n_embd = kwargs.get('n_embd', 256)
        self.dropout = kwargs.get('dropout', 0.1)
        self.bias = kwargs.get('bias', False)
        
        # Masking parameters
        self.mask_aware = kwargs.get('mask_aware', True)
        self.epsilon = kwargs.get('epsilon', 0.0)

class NanoGPT(nn.Module):
    """
    Andrej Karpathy-style Nano GPT with masking awareness.
    Adapted for knowledge vs reasoning separation research.
    """
    
    def __init__(self, config):
        super().__init__()
        assert config.vocab_size is not None
        assert config.block_size is not None
        self.config = config

        self.transformer = nn.ModuleDict(dict(
            wte = nn.Embedding(config.vocab_size, config.n_embd),
            wpe = nn.Embedding(config.block_size, config.n_embd),
            drop = nn.Dropout(config.dropout),
            h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
            ln_f = LayerNorm(config.n_embd, bias=config.bias),
        ))
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        
        # Mask-aware components
        if config.mask_aware:
            self.mask_embedding = nn.Embedding(2, config.n_embd)  # 0: normal, 1: masked
            self.mask_classifier = nn.Linear(config.n_embd, 2)  # Predict if token is masked

        # init all weights
        self.apply(self._init_weights)
        # apply special scaled init to the residual projections, per GPT-2 paper
        for pn, p in self.named_parameters():
            if pn.endswith('c_proj.weight'):
                torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config.n_layer))

        # report number of parameters
        n_params = sum(p.numel() for p in self.parameters())
        print(f"number of parameters: {n_params/1e6:.2f}M")

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None, attention_mask=None, epsilon=None):
        device = idx.device
        b, t = idx.size()
        assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
        pos = torch.arange(0, t, dtype=torch.long, device=device) # shape (t)

        # forward the GPT model itself
        tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd)
        pos_emb = self.transformer.wpe(pos) # position embeddings of shape (t, n_embd)
        x = self.transformer.drop(tok_emb + pos_emb)
        
        # Add mask-aware embeddings if enabled
        if self.config.mask_aware and attention_mask is not None:
            # Create mask indicators (1 for masked tokens, 0 for normal)
            mask_indicators = (idx == enhanced_tokenizer.mask_token_id).long()
            mask_emb = self.mask_embedding(mask_indicators)
            x = x + mask_emb

        for block in self.transformer.h:
            x = block(x, attention_mask)
        x = self.transformer.ln_f(x)

        if targets is not None:
            # if we are given some desired targets also calculate the loss
            logits = self.lm_head(x)
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1)
        else:
            # inference-time mini-optimization: only forward the lm_head on the very last position
            logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim
            loss = None

        return logits, loss

    def crop_block_size(self, block_size):
        # model surgery to decrease the block size if necessary
        # e.g. we may load the model pre-trained with a block size of 1024
        # but want to use it with a smaller block size for some reason such as
        # smaller memory usage.
        assert block_size <= self.config.block_size
        self.config.block_size = block_size
        self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size])
        for block in self.transformer.h:
            if hasattr(block.attn, 'bias'):
                block.attn.bias = block.attn.bias[:,:,:block_size,:block_size]

    def configure_optimizers(self, weight_decay, learning_rate, betas, device_type):
        # start with all of the candidate parameters
        param_dict = {pn: p for pn, p in self.named_parameters()}
        # filter out those that do not require gradients
        param_dict = {pn: p for pn, p in param_dict.items() if p.requires_grad}
        # create optim groups. Any parameters that is 2D will be weight decayed, otherwise no.
        # i.e. all weight tensors in matmuls + embeddings decay, all biases and layernorms don't.
        decay_params = [p for n, p in param_dict.items() if p.dim() >= 2]
        nodecay_params = [p for n, p in param_dict.items() if p.dim() < 2]
        optim_groups = [
            {'params': decay_params, 'weight_decay': weight_decay},
            {'params': nodecay_params, 'weight_decay': 0.0}
        ]
        num_decay_params = sum(p.numel() for p in decay_params)
        num_nodecay_params = sum(p.numel() for p in nodecay_params)
        print(f"num decayed parameter tensors: {len(decay_params)}, with {num_decay_params:,} parameters")
        print(f"num non-decayed parameter tensors: {len(nodecay_params)}, with {num_nodecay_params:,} parameters")
        # Create AdamW optimizer and use the fused version if it is available
        fused_available = 'fused' in inspect.signature(torch.optim.AdamW).parameters
        use_fused = fused_available and device_type == 'cuda'
        extra_args = dict(fused=True) if use_fused else dict()
        optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas, **extra_args)
        print(f"using fused AdamW: {use_fused}")

        return optimizer

    @torch.no_grad()
    def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
        """
        Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
        the sequence max_new_tokens times, feeding the predictions back into the model each time.
        Most likely you'll want to make sure to be in model.eval() mode of operation for this.
        """
        for _ in range(max_new_tokens):
            # if the sequence context is growing too long we must crop it at block_size
            idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
            # forward the model to get the logits for the index in the sequence
            logits, _ = self(idx_cond)
            # pluck the logits at the final step and scale by desired temperature
            logits = logits[:, -1, :] / temperature
            # optionally crop the logits to only the top k options
            if top_k is not None:
                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
                logits[logits < v[:, [-1]]] = -float('Inf')
            # apply softmax to convert logits to (normalized) probabilities
            probs = F.softmax(logits, dim=-1)
            # sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1)
            # append sampled index to the running sequence and continue
            idx = torch.cat((idx, idx_next), dim=1)

        return idx

# Import required modules for the GPT implementation
import math
import inspect
import torch.nn.functional as F

print("NanoGPT implementation ready!")


## 7. Training Procedure with ε-Masking


In [None]:
class MaskedDataset(Dataset):
    """
    Dataset for training with ε-masking.
    """
    
    def __init__(self, texts: List[str], tokenizer: EnhancedTokenizer, epsilon: float):
        self.texts = texts
        self.tokenizer = tokenizer
        self.epsilon = epsilon
        
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        
        # Tokenize with structural hints and masking
        result = self.tokenizer.tokenize_with_structural_hints(text, self.epsilon)
        
        # For language modeling, targets are the input shifted by 1
        input_ids = result['input_ids']
        targets = torch.cat([input_ids[1:], torch.tensor([self.tokenizer.pad_token_id])])
        
        return {
            'input_ids': input_ids,
            'targets': targets,
            'attention_mask': result['attention_mask'],
            'original_text': result['original_text'],
            'masked_text': result['masked_text'],
            'epsilon': self.epsilon
        }

def train_model_with_epsilon_masking(texts: List[str], epsilon: float, num_epochs: int = 3) -> Tuple[NanoGPT, EnhancedTokenizer]:
    """
    Train a NanoGPT model with specific epsilon masking level.
    """
    print(f"\\n=== Training model with ε={epsilon} ===")
    
    # Create model configuration
    config = GPTConfig(
        block_size=512,
        vocab_size=len(enhanced_tokenizer.base_tokenizer),
        n_layer=6,
        n_head=8,
        n_embd=256,
        dropout=0.1,
        bias=False,
        mask_aware=True,
        epsilon=epsilon
    )
    
    # Initialize model
    model = NanoGPT(config)
    model.to(Config.DEVICE)
    
    # Create dataset and dataloader
    dataset = MaskedDataset(texts, enhanced_tokenizer, epsilon)
    dataloader = DataLoader(dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
    
    # Setup optimizer
    optimizer = model.configure_optimizers(
        weight_decay=0.1,
        learning_rate=Config.LEARNING_RATE,
        betas=(0.9, 0.95),
        device_type='cuda' if torch.cuda.is_available() else 'cpu'
    )
    
    # Training loop
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        num_batches = 0
        
        for batch in dataloader:
            input_ids = batch['input_ids'].to(Config.DEVICE)
            targets = batch['targets'].to(Config.DEVICE)
            attention_mask = batch['attention_mask'].to(Config.DEVICE)
            
            optimizer.zero_grad()
            
            # Forward pass
            logits, loss = model(input_ids, targets=targets, attention_mask=attention_mask, epsilon=epsilon)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1
        
        avg_loss = total_loss / num_batches
        print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
    
    return model, enhanced_tokenizer

def evaluate_model_on_tasks(model: NanoGPT, tokenizer: EnhancedTokenizer, tasks: List[Dict]) -> Dict:
    """
    Evaluate model on benchmark tasks.
    """
    model.eval()
    results = []
    
    for task in tasks:
        context = task['context']
        question = task['question']
        options = task['options']
        correct_answer = task['answer']
        
        # Create input prompt
        prompt = f"{context} {question} Answer:"
        
        # Get model predictions for each option
        option_scores = []
        for option in options:
            full_prompt = f"{prompt} {option}"
            result = tokenizer.tokenize_with_structural_hints(full_prompt, epsilon=0.0)
            input_ids = result['input_ids'].unsqueeze(0).to(Config.DEVICE)
            attention_mask = result['attention_mask'].unsqueeze(0).to(Config.DEVICE)
            
            with torch.no_grad():
                logits, _ = model(input_ids, attention_mask=attention_mask)
                # Use the last token's probability as score
                last_token_logits = logits[0, -1, :]
                option_score = torch.softmax(last_token_logits, dim=-1).max().item()
                option_scores.append(option_score)
        
        # Select highest scoring option
        predicted_idx = np.argmax(option_scores)
        predicted_answer = options[predicted_idx]
        is_correct = predicted_answer == correct_answer
        
        results.append({
            'task_id': task['id'],
            'predicted_answer': predicted_answer,
            'correct_answer': correct_answer,
            'is_correct': is_correct,
            'confidence': max(option_scores),
            'reasoning_type': task['reasoning_type']
        })
    
    accuracy = sum(r['is_correct'] for r in results) / len(results)
    avg_confidence = sum(r['confidence'] for r in results) / len(results)
    
    return {
        'accuracy': accuracy,
        'avg_confidence': avg_confidence,
        'total_tasks': len(results),
        'correct_tasks': sum(r['is_correct'] for r in results),
        'detailed_results': results
    }

print("Training and evaluation functions ready!")


## 8. Run the Complete Experiment


In [None]:
# Sample training data (in practice, you'd use a larger dataset like WikiText-103)
sample_texts = [
    "The cat sat on the mat and purred contentedly.",
    "Paris is the capital of France and a major European city.",
    "All birds can fly, but penguins are flightless birds.",
    "The Eiffel Tower was built in Paris by Gustave Eiffel.",
    "If it rains, the ground gets wet from the water.",
    "Shakespeare wrote many famous plays including Hamlet and Macbeth.",
    "The sun rises in the east and sets in the west.",
    "John has three apples and gives one to Mary.",
    "The trophy doesn't fit in the brown suitcase because it is too large.",
    "The city councilmen refused the demonstrators a permit because they feared violence.",
    "All birds can fly. Penguins are birds. Can penguins fly?",
    "If it rains, the ground gets wet. The ground is wet. Did it rain?"
]

print(f"Training with {len(sample_texts)} sample texts")
print("Sample texts:")
for i, text in enumerate(sample_texts[:3]):
    print(f"{i+1}. {text}")

# Run the complete experiment
print("\\n" + "="*60)
print("STARTING KNOWLEDGE VS REASONING SEPARATION EXPERIMENT")
print("="*60)

results = {}

for epsilon in Config.EPSILON_VALUES:
    print(f"\\n{'='*20} ε={epsilon} {'='*20}")
    
    # Train model with this epsilon
    model, tokenizer = train_model_with_epsilon_masking(sample_texts, epsilon, num_epochs=2)
    
    # Evaluate on all quadrants
    epsilon_results = {}
    
    for quadrant_name, tasks in benchmark_tasks.items():
        print(f"\\nEvaluating {quadrant_name}...")
        quadrant_result = evaluate_model_on_tasks(model, tokenizer, tasks)
        epsilon_results[quadrant_name] = quadrant_result
        print(f"Accuracy: {quadrant_result['accuracy']:.3f} ({quadrant_result['correct_tasks']}/{quadrant_result['total_tasks']})")
    
    results[epsilon] = epsilon_results

print("\\n" + "="*60)
print("EXPERIMENT COMPLETED!")
print("="*60)


## 9. Results Analysis and Visualization


In [None]:
def visualize_results(results: Dict):
    """
    Create visualizations of the experimental results.
    """
    # Prepare data for plotting
    epsilon_values = list(results.keys())
    quadrants = list(results[epsilon_values[0]].keys())
    
    # Create accuracy plot
    plt.figure(figsize=(12, 8))
    
    for quadrant in quadrants:
        accuracies = [results[eps][quadrant]['accuracy'] for eps in epsilon_values]
        plt.plot(epsilon_values, accuracies, marker='o', label=quadrant.replace('_', ' ').title(), linewidth=2)
    
    plt.xlabel('Epsilon (Masking Level)', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.title('Performance vs. Knowledge Masking Level', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Create confidence plot
    plt.figure(figsize=(12, 8))
    
    for quadrant in quadrants:
        confidences = [results[eps][quadrant]['avg_confidence'] for eps in epsilon_values]
        plt.plot(epsilon_values, confidences, marker='s', label=quadrant.replace('_', ' ').title(), linewidth=2)
    
    plt.xlabel('Epsilon (Masking Level)', fontsize=12)
    plt.ylabel('Average Confidence', fontsize=12)
    plt.title('Model Confidence vs. Knowledge Masking Level', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

def analyze_results(results: Dict):
    """
    Analyze and print key findings from the results.
    """
    print("\\n" + "="*60)
    print("EXPERIMENTAL RESULTS ANALYSIS")
    print("="*60)
    
    epsilon_values = list(results.keys())
    quadrants = list(results[epsilon_values[0]].keys())
    
    for quadrant in quadrants:
        print(f"\\n{quadrant.replace('_', ' ').title()}:")
        print("-" * 40)
        
        accuracies = [results[eps][quadrant]['accuracy'] for eps in epsilon_values]
        
        for i, eps in enumerate(epsilon_values):
            print(f"ε={eps}: Accuracy={accuracies[i]:.3f}")
        
        # Find best epsilon for this quadrant
        best_eps_idx = np.argmax(accuracies)
        best_eps = epsilon_values[best_eps_idx]
        best_acc = accuracies[best_eps_idx]
        
        print(f"Best performance: ε={best_eps} (Accuracy={best_acc:.3f})")
        
        # Check if masking helps
        baseline_acc = accuracies[0]  # ε=0.05 (minimal masking)
        max_acc = max(accuracies)
        
        if max_acc > baseline_acc:
            improvement = max_acc - baseline_acc
            print(f"✓ Masking helps! Improvement: +{improvement:.3f}")
        else:
            print("✗ Masking does not help in this quadrant")
    
    # Overall analysis
    print("\\n" + "="*60)
    print("KEY FINDINGS")
    print("="*60)
    
    # Check if light knowledge + heavy reasoning benefits from masking
    lk_hr_quadrant = "light_knowledge_heavy_reasoning"
    if lk_hr_quadrant in quadrants:
        lk_hr_accs = [results[eps][lk_hr_quadrant]['accuracy'] for eps in epsilon_values]
        baseline_lk_hr = lk_hr_accs[0]
        best_lk_hr = max(lk_hr_accs)
        
        if best_lk_hr > baseline_lk_hr:
            print(f"✓ HYPOTHESIS SUPPORTED: Light Knowledge + Heavy Reasoning benefits from masking!")
            print(f"  Improvement: {best_lk_hr - baseline_lk_hr:.3f}")
        else:
            print("✗ HYPOTHESIS NOT SUPPORTED: Masking does not help Light Knowledge + Heavy Reasoning")
    
    print("\\nExperiment completed successfully!")

# Visualize and analyze results
if 'results' in locals() and results:
    visualize_results(results)
    analyze_results(results)
else:
    print("No results available yet. Please run the experiment first.")


## 10. Demo: Structural Hint Processing


In [None]:
# Demo the structural hint processing
demo_text = "The Eiffel Tower in Paris was designed by Gustave Eiffel, a French engineer."

print("Original text:")
print(demo_text)
print()

for epsilon in [0.1, 0.3, 0.5]:
    masked_text = processor.apply_epsilon_masking(demo_text, epsilon)
    print(f"ε={epsilon}: {masked_text}")

print("\\nNote: Function words, punctuation, and NER entities are preserved while other content is masked.")


## 11. Next Steps and Extensions


In [None]:
print("""
NEXT STEPS FOR FULL RESEARCH:

1. SCALE UP:
   - Use larger datasets (WikiText-103, OSCAR)
   - Train for more epochs with proper validation
   - Use larger models (GPT-2 small/medium)

2. IMPROVE EVALUATION:
   - Add more benchmark tasks per quadrant
   - Use proper evaluation metrics (BLEU, ROUGE, etc.)
   - Implement few-shot evaluation

3. ENHANCE MASKING:
   - Experiment with different masking strategies
   - Add semantic masking (mask related concepts)
   - Implement dynamic epsilon based on task difficulty

4. ANALYSIS:
   - Statistical significance testing
   - Error analysis and case studies
   - Ablation studies on structural hints

5. DATASETS TO CONSIDER:
   - WikiText-103 for training
   - Winograd Schema Challenge
   - SQuAD 2.0
   - GLUE benchmark tasks
   - CommonsenseQA

6. TOKENIZATION IMPROVEMENTS:
   - Dynamic vocabulary expansion for entities
   - Better handling of multi-word entities
   - Consistent entity mapping across documents

This POC demonstrates the core methodology. The full research would involve
more extensive experiments and analysis.
""")


## 12. Enhanced Winograd Schema Challenge with Hugging Face Models


In [None]:
class WinogradEvaluator:
    """
    Evaluates Hugging Face models on Winograd Schema Challenge with detailed error analysis.
    """
    
    def __init__(self, model_name: str = "gpt2"):
        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        
        # Set pad token if not present
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        
        print(f"Model {model_name} loaded on {self.device}")
    
    def evaluate_schema(self, schema: Dict, epsilon: float = 0.0) -> Dict:
        """
        Evaluate a single Winograd schema.
        Returns detailed results including confidence scores and reasoning.
        """
        # Apply masking if epsilon > 0
        if epsilon > 0:
            masked_text = processor.apply_epsilon_masking(schema["text"], epsilon)
        else:
            masked_text = schema["text"]
        
        # Create prompt
        prompt = f"{masked_text} {schema['question']} Answer:"
        
        # Get scores for each option
        option_scores = []
        option_details = []
        
        for option in schema["options"]:
            full_prompt = f"{prompt} {option}"
            
            # Tokenize
            inputs = self.tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
            inputs = {k: v.to(self.device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = self.model(**inputs)
                logits = outputs.logits
                
                # Get the probability of the last token (the option)
                last_token_logits = logits[0, -1, :]
                probabilities = torch.softmax(last_token_logits, dim=-1)
                
                # Get the probability of the option token
                option_tokens = self.tokenizer.encode(option, add_special_tokens=False)
                if option_tokens:
                    option_prob = probabilities[option_tokens[0]].item()
                else:
                    option_prob = 0.0
                
                # Also get the max probability for confidence
                max_prob = probabilities.max().item()
                
                option_scores.append(option_prob)
                option_details.append({
                    "option": option,
                    "probability": option_prob,
                    "max_probability": max_prob,
                    "prompt": full_prompt
                })
        
        # Determine prediction
        predicted_idx = np.argmax(option_scores)
        predicted_answer = schema["options"][predicted_idx]
        is_correct = predicted_answer == schema["answer"]
        
        return {
            "schema_id": schema["id"],
            "original_text": schema["text"],
            "masked_text": masked_text,
            "epsilon": epsilon,
            "question": schema["question"],
            "options": schema["options"],
            "correct_answer": schema["answer"],
            "predicted_answer": predicted_answer,
            "is_correct": is_correct,
            "option_scores": option_scores,
            "option_details": option_details,
            "confidence": max(option_scores),
            "difficulty": schema["difficulty"],
            "reasoning": schema["reasoning"]
        }
    
    def evaluate_all_schemas(self, schemas: List[Dict], epsilon: float = 0.0) -> List[Dict]:
        """
        Evaluate all schemas with given epsilon.
        """
        results = []
        for schema in schemas:
            result = self.evaluate_schema(schema, epsilon)
            results.append(result)
        return results
    
    def get_error_analysis(self, results: List[Dict]) -> Dict:
        """
        Analyze errors and provide detailed insights.
        """
        errors = [r for r in results if not r["is_correct"]]
        correct = [r for r in results if r["is_correct"]]
        
        analysis = {
            "total_schemas": len(results),
            "correct_count": len(correct),
            "error_count": len(errors),
            "accuracy": len(correct) / len(results),
            "errors_by_difficulty": {},
            "error_details": errors,
            "correct_details": correct,
            "avg_confidence_correct": np.mean([r["confidence"] for r in correct]) if correct else 0,
            "avg_confidence_errors": np.mean([r["confidence"] for r in errors]) if errors else 0
        }
        
        # Analyze errors by difficulty
        for difficulty in ["easy", "medium", "hard"]:
            difficulty_errors = [e for e in errors if e["difficulty"] == difficulty]
            difficulty_total = len([r for r in results if r["difficulty"] == difficulty])
            analysis["errors_by_difficulty"][difficulty] = {
                "error_count": len(difficulty_errors),
                "total_count": difficulty_total,
                "error_rate": len(difficulty_errors) / difficulty_total if difficulty_total > 0 else 0
            }
        
        return analysis

# Initialize evaluator
evaluator = WinogradEvaluator("gpt2")
print("Winograd evaluator ready!")


## 13. Easy Access Functions for Winograd Analysis



In [None]:
def run_winograd_experiment(model_name: str = "gpt2", epsilon_values: List[float] = [0.0, 0.1, 0.3, 0.5]) -> Dict:
    """
    Run complete Winograd experiment with multiple epsilon values.
    
    Args:
        model_name: Hugging Face model name
        epsilon_values: List of masking levels to test
    
    Returns:
        Dictionary with results for each epsilon level
    """
    print(f"Running Winograd experiment with {model_name}")
    print(f"Testing epsilon values: {epsilon_values}")
    
    # Initialize evaluator
    evaluator = WinogradEvaluator(model_name)
    
    # Get all schemas
    schemas = winograd_data.get_all_schemas()
    
    results = {}
    
    for epsilon in epsilon_values:
        print(f"\\n{'='*20} Testing ε={epsilon} {'='*20}")
        
        # Evaluate all schemas
        schema_results = evaluator.evaluate_all_schemas(schemas, epsilon)
        
        # Get error analysis
        analysis = evaluator.get_error_analysis(schema_results)
        
        results[epsilon] = {
            "schema_results": schema_results,
            "analysis": analysis
        }
        
        print(f"Accuracy: {analysis['accuracy']:.3f} ({analysis['correct_count']}/{analysis['total_schemas']})")
        print(f"Errors: {analysis['error_count']}")
        
        # Show error breakdown by difficulty
        for difficulty, stats in analysis["errors_by_difficulty"].items():
            print(f"  {difficulty}: {stats['error_count']}/{stats['total_count']} ({stats['error_rate']:.3f})")
    
    return results

def get_detailed_errors(results: Dict, epsilon: float) -> List[Dict]:
    """
    Get detailed information about errors for a specific epsilon.
    
    Args:
        results: Results from run_winograd_experiment
        epsilon: Epsilon value to analyze
    
    Returns:
        List of detailed error information
    """
    if epsilon not in results:
        print(f"No results found for epsilon={epsilon}")
        return []
    
    errors = results[epsilon]["analysis"]["error_details"]
    
    detailed_errors = []
    for error in errors:
        detailed_error = {
            "schema_id": error["schema_id"],
            "original_text": error["original_text"],
            "masked_text": error["masked_text"],
            "question": error["question"],
            "correct_answer": error["correct_answer"],
            "predicted_answer": error["predicted_answer"],
            "difficulty": error["difficulty"],
            "confidence": error["confidence"],
            "reasoning": error["reasoning"],
            "option_scores": error["option_scores"],
            "option_details": error["option_details"]
        }
        detailed_errors.append(detailed_error)
    
    return detailed_errors

def compare_epsilon_performance(results: Dict) -> pd.DataFrame:
    """
    Compare performance across different epsilon values.
    
    Args:
        results: Results from run_winograd_experiment
    
    Returns:
        DataFrame with performance comparison
    """
    comparison_data = []
    
    for epsilon, result in results.items():
        analysis = result["analysis"]
        
        row = {
            "epsilon": epsilon,
            "accuracy": analysis["accuracy"],
            "correct_count": analysis["correct_count"],
            "error_count": analysis["error_count"],
            "avg_confidence_correct": analysis["avg_confidence_correct"],
            "avg_confidence_errors": analysis["avg_confidence_errors"]
        }
        
        # Add difficulty-specific accuracies
        for difficulty in ["easy", "medium", "hard"]:
            difficulty_stats = analysis["errors_by_difficulty"][difficulty]
            row[f"{difficulty}_accuracy"] = 1 - difficulty_stats["error_rate"]
        
        comparison_data.append(row)
    
    return pd.DataFrame(comparison_data)

def visualize_winograd_results(results: Dict):
    """
    Create visualizations of the Winograd results.
    """
    df = compare_epsilon_performance(results)
    
    # Create subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Overall accuracy
    axes[0, 0].plot(df['epsilon'], df['accuracy'], marker='o', linewidth=2)
    axes[0, 0].set_xlabel('Epsilon (Masking Level)')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].set_title('Winograd Accuracy vs. Masking Level')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Accuracy by difficulty
    for difficulty in ['easy', 'medium', 'hard']:
        axes[0, 1].plot(df['epsilon'], df[f'{difficulty}_accuracy'], marker='o', label=difficulty.title(), linewidth=2)
    axes[0, 1].set_xlabel('Epsilon (Masking Level)')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].set_title('Winograd Accuracy by Difficulty')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # Confidence comparison
    axes[1, 0].plot(df['epsilon'], df['avg_confidence_correct'], marker='o', label='Correct', linewidth=2)
    axes[1, 0].plot(df['epsilon'], df['avg_confidence_errors'], marker='s', label='Errors', linewidth=2)
    axes[1, 0].set_xlabel('Epsilon (Masking Level)')
    axes[1, 0].set_ylabel('Average Confidence')
    axes[1, 0].set_title('Confidence: Correct vs. Errors')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # Error count
    axes[1, 1].bar(df['epsilon'], df['error_count'], alpha=0.7)
    axes[1, 1].set_xlabel('Epsilon (Masking Level)')
    axes[1, 1].set_ylabel('Number of Errors')
    axes[1, 1].set_title('Error Count by Masking Level')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return df

print("Easy access functions ready!")
print("\\nAvailable functions:")
print("- run_winograd_experiment(model_name, epsilon_values)")
print("- get_detailed_errors(results, epsilon)")
print("- compare_epsilon_performance(results)")
print("- visualize_winograd_results(results)")
