### Install libraries

In [None]:
!pip install minicons
!pip install nltk
!pip install matplotlib

### Import libraries and functions

In [None]:
# Import "scorer" module from the minicons library, which provides functionalities for computing 
# log-probabilities and surprisal using pretrained transformer models
from minicons import scorer

# Load helper functions from the file helper_functions.py
from helper_functions import plot_surprisals

### Initialize the model

In [None]:
model_name = "HuggingFaceTB/SmolLM2-135M"

# Load the specified model IncrementalLMScorer is a wrapper around any causal (autoregressive) language model accessible via Hugging Face’s transformers.
lmScorer = scorer.IncrementalLMScorer(model_name)

### Generate surprisal for tokens


In [None]:
sent_garden_path_amb = "The horse raced past the barn fell."
sent_garden_path_unamb = "The horse that was raced past the barn fell."

In [None]:
token_surprisal_amb = lmScorer.token_score(
    sent_garden_path_amb, 
    bos_token=True,
    prob=False,
    surprisal=True,
    bow_correction=True
)

token_surprisal_amb

In [None]:
plot_surprisals(token_surprisal_amb)

In [None]:
token_surprisal_unamb = lmScorer.token_score(
    sent_garden_path_unamb, 
    bos_token=True,
    prob=False,
    surprisal=True,
    bow_correction=True
)

token_surprisal_unamb

In [None]:
plot_surprisals(token_surprisal_unamb)

### Generate surprisal for words


#### Load `TweetTokenizer` from `nltk` library

In [None]:
from nltk.tokenize import TweetTokenizer
word_tokenizer = TweetTokenizer().tokenize

In [None]:
word_surprisal_amb = lmScorer.word_score_tokenized(
    sent_garden_path_amb, 
    bos_token=True,
    tokenize_function=word_tokenizer,
    surprisal=True,
    bow_correction=True,
)

word_surprisal_amb

In [None]:
plot_surprisals(word_surprisal_amb, ymax=20)

<BR>
<BR>

### Exercise: Compare surprisal predictions for the following psycholinguistic effects
1. Object-Relative Clause (King & Just, 1991) → Harder than subject-relative clauses.
    - "The reporter that the senator attacked admitted the error."
    - "The reporter that attacked the senator admitted the error."

1. Negative Polarity Item Licensing (Xiang, et al. 2009) → Complexity in licensing “no” with downward-entailing contexts.
    - Grammatical: {No/Very few} restaurants that the local newspapers have recommended in their dining reviews have ever gone out of business
    - Ungrammatical (intrusive licensor): The restaurants that {no/very few} local newspapers have recommended in their dining reviews have ever gone out of business
    - Ungrammatical (no licensor): Most restaurants that the local newspapers have recommended in their dining reviews have ever gone out of business

1. Pronoun Resolution Difficulty (Garnham, 2001) → Ambiguity in pronoun referent.
    - "John told Bill that he was going to win."
    - "John told Mary that he was going to win."

1. Center-Embedding (Miller & Chomsky, 1963) → Processing load increases with nested clauses.
    - "The rat the cat the dog chased killed ate the cheese."