# Winograd Schema Challenge Benchmark

This notebook implements a comprehensive Winograd Schema Challenge benchmark with detailed error analysis using Hugging Face models.

## Research Focus
Testing the hypothesis: **Can restricting knowledge (ε-masking) actually help reasoning tasks like Winograd schemas?**


## 1. Installation and Imports


In [None]:
%pip install transformers torch datasets spacy nltk scikit-learn matplotlib seaborn pandas numpy
!python -m spacy download en_core_web_sm


In [None]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import spacy
import re
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Dict, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

print("All packages imported successfully!")
print(f"PyTorch version: {torch.__version__}")


## 2. Winograd Schema Dataset


In [None]:
class WinogradDataset:
    """
    Winograd Schema Challenge dataset with detailed examples.
    """
    
    def __init__(self):
        self.schemas = [
            {
                "id": "wsc_001",
                "text": "The trophy doesn't fit in the brown suitcase because it is too large.",
                "question": "What is too large?",
                "options": ["trophy", "suitcase"],
                "answer": "trophy",
                "reasoning": "The pronoun 'it' refers to the trophy, and the trophy is what doesn't fit because it's too large.",
                "difficulty": "medium"
            },
            {
                "id": "wsc_002",
                "text": "The city councilmen refused the demonstrators a permit because they feared violence.",
                "question": "Who feared violence?",
                "options": ["city councilmen", "demonstrators"],
                "answer": "city councilmen",
                "reasoning": "The pronoun 'they' refers to the city councilmen who refused the permit because they (the councilmen) feared violence.",
                "difficulty": "hard"
            },
            {
                "id": "wsc_003",
                "text": "The large ball crashed right through the table because it was made of Styrofoam.",
                "question": "What was made of Styrofoam?",
                "options": ["large ball", "table"],
                "answer": "table",
                "reasoning": "The pronoun 'it' refers to the table, which was made of Styrofoam, allowing the ball to crash through it.",
                "difficulty": "medium"
            },
            {
                "id": "wsc_004",
                "text": "The delivery truck zoomed by the school bus because it was going so fast.",
                "question": "What was going so fast?",
                "options": ["delivery truck", "school bus"],
                "answer": "delivery truck",
                "reasoning": "The pronoun 'it' refers to the delivery truck, which was going so fast that it zoomed by the school bus.",
                "difficulty": "easy"
            },
            {
                "id": "wsc_005",
                "text": "Sam tried to paint a picture of her dog, but she couldn't because it was too hairy.",
                "question": "What was too hairy?",
                "options": ["picture", "dog"],
                "answer": "dog",
                "reasoning": "The pronoun 'it' refers to the dog, which was too hairy for Sam to paint a good picture of.",
                "difficulty": "medium"
            },
            {
                "id": "wsc_006",
                "text": "The lawyer asked the witness a question, but he was unable to provide a good answer.",
                "question": "Who was unable to provide a good answer?",
                "options": ["lawyer", "witness"],
                "answer": "witness",
                "reasoning": "The pronoun 'he' refers to the witness, who was unable to provide a good answer to the lawyer's question.",
                "difficulty": "easy"
            },
            {
                "id": "wsc_007",
                "text": "The firemen arrived after the house had burned down, but they were still able to save the cat.",
                "question": "Who were still able to save the cat?",
                "options": ["firemen", "house"],
                "answer": "firemen",
                "reasoning": "The pronoun 'they' refers to the firemen, who were still able to save the cat despite arriving after the house burned down.",
                "difficulty": "medium"
            },
            {
                "id": "wsc_008",
                "text": "The student asked the teacher a question, but she didn't know the answer.",
                "question": "Who didn't know the answer?",
                "options": ["student", "teacher"],
                "answer": "teacher",
                "reasoning": "The pronoun 'she' refers to the teacher, who didn't know the answer to the student's question.",
                "difficulty": "easy"
            }
        ]
    
    def get_all_schemas(self) -> List[Dict]:
        return self.schemas
    
    def get_by_difficulty(self, difficulty: str) -> List[Dict]:
        return [s for s in self.schemas if s["difficulty"] == difficulty]
    
    def get_schema_by_id(self, schema_id: str) -> Optional[Dict]:
        for schema in self.schemas:
            if schema["id"] == schema_id:
                return schema
        return None

# Initialize dataset
winograd_data = WinogradDataset()
print(f"Winograd dataset loaded with {len(winograd_data.get_all_schemas())} schemas")
print(f"Easy: {len(winograd_data.get_by_difficulty('easy'))}")
print(f"Medium: {len(winograd_data.get_by_difficulty('medium'))}")
print(f"Hard: {len(winograd_data.get_by_difficulty('hard'))}")


atch! 