# Exercise 2 – Number 1 (Updating ELIZA)

This section sets up a simple ELIZA-style chatbot using regular expressions.

In [16]:
# ELIZA setup: imports, history, reflection helper, and response rules
import re

# [BONUS]: Global set to store history of inputs for repetition detection
previous_inputs = set()

def reflect(fragment):
    """Reflect user input to make responses more natural."""
    reflections = {
        "am": "are",
        "was": "were",
        "i": "you",
        "i'd": "you would",
        "i've": "you have",
        "i'll": "you will",
        "my": "your",
        "are": "am",
        "you've": "I have",
        "you'll": "I will",
        "your": "my",
        "yours": "mine",
        "you": "me",
        "me": "you"
    }
    words = fragment.lower().split()
    return ' '.join([reflections.get(word, word) for word in words])

def eliza_response(user_input):
    """Generate ELIZA-style responses based on input."""

    # [BONUS] detect repeated questions
    normalized_input = user_input.strip().lower()
    if normalized_input in previous_inputs:
        return "Girl you already asked that! What more do you want?"
    previous_inputs.add(normalized_input)

    patterns = [
        # a. I want to know the reasons why I am feeling depressed all the time.
        (
            r"I\s*want\s+to\s+know(.*)",
            "It sounds like you really want to understand{0}. What do you think is behind that?"
        ),

        # b. I am feeling stressed.
        (
            r"I\s*am\s+feeling(.*)",
            "You’re feeling{0}. What has been contributing to those feelings?"
        ),

        # c. My feelings towards my crush are invalidated.
        (
            r"My feelings towards my crush are(.*)",
            "You feel your feelings for your crush are{0}. What happened that made you feel this way?"
        ),

        # d. You don’t understand me OR You do not understand me.
        (
            r"You\s+(?:don['’]t|do not)\s*(.*)",
            "You’re sensing that I don’t understand you{0}. What do you wish people understood better about you?"
        ),

        # e. I can’t focus on my studies OR I cannot focus on my studies.
        (
            r"I\s+(?:can['’]t\s+focus|cannot\s+focus\s+on)\s*(.*)",
            "Focusing on your{0} sounds really difficult right now. What usually gets in the way?"
        ),
    ]

    for pattern, response in patterns:
        match = re.match(pattern, user_input, re.IGNORECASE)
        if match:
            if match.groups():
                return response.format(reflect(match.group(match.lastindex)))
            else:
                return response

    return "Can you tell me more?"

In [17]:
# ELIZA main interaction loop
print("ELIZA: Hello! How can I help you today?")
while True:
    user_input = input("You: ")
    if user_input.lower() in ["quit", "exit"]:
        print("ELIZA: Goodbye!")
        break
    print(f"ELIZA: {eliza_response(user_input)}")

ELIZA: Hello! How can I help you today?
ELIZA: It sounds like you really want to understandwhy. What do you think is behind that?
ELIZA: Can you tell me more?
ELIZA: Girl you already asked that! What more do you want?
ELIZA: Girl you already asked that! What more do you want?
ELIZA: Girl you already asked that! What more do you want?
ELIZA: Girl you already asked that! What more do you want?
ELIZA: Girl you already asked that! What more do you want?
ELIZA: Girl you already asked that! What more do you want?


KeyboardInterrupt: Interrupted by user

# Exercise 2 – Number 2 (Regular Expressions in NLP)





In this part of the notebook, we will update ELIZA by implementing RegEx on NLP:



1. From a short *Alice* passage, pull out all words that start with a capital letter (e.g., names, sentence starts).



2. From the file *melville-moby_dick.txt*, find every occurrence of the word **whale** in the text.



3. Using NLTK’s `webtext` corpus (`pirates.txt`), collect all the lines spoken by **Jack Sparrow**.

In [None]:
# All imports
import re
import nltk
from nltk.corpus import webtext

In [None]:
# Part (a): Find all words that start with a capital letter in the Alice passage

text_a = (
    """Alice was beginning to get very tired of sitting by her sister on the bank,
    and of having nothing to do.  Once or twice she had peeped into the book her sister 
    was reading, but it had no pictures or conversations in it, \"and what is the use of 
    a book,\" thought Alice, \"without pictures or conversations?"""
)

# RegEx pattern: match any whole word that begins with an uppercase letter

pattern_a = r"\b[A-Z][a-zA-Z]*\b"

capitalized_words = re.findall(pattern_a, text_a)

capitalized_words

['Alice', 'Once', 'Alice']

In [None]:
# Part (b): Read Moby Dick and pull out every occurrence of the word 'whale' or 'whales'


with open("melville-moby_dick.txt", "r", encoding="utf-8") as f:

    moby_text = f.read()



# RegEx pattern: match the whole word 'whale' or 'whales', ignoring case (whale, Whale, WHALES, etc.)

pattern_b = r"\bwhales?\b"



whale_matches = re.findall(pattern_b, moby_text, flags=re.IGNORECASE)



# Show how many times it appears and preview the first 20 matches

len(whale_matches), whale_matches[:20]

(1500,
 ['Whale',
  'Whale',
  'WHALE',
  'WHALE',
  'Whale',
  'Whales',
  'Whales',
  'Whales',
  'Whale',
  'Whale',
  'Whale',
  'Whale',
  'Whale',
  'Whale',
  'Whale',
  'Whale',
  'Whale',
  'Whale',
  'whale',
  'WHALE'])

In [None]:
# Part (c): Use the NLTK pirates corpus to grab Jack Sparrow's spoken lines

# Download the webtext corpus first
nltk.download("webtext")

pirates_text = webtext.raw("pirates.txt")

# RegEx pattern: find lines that start with 'JACK:' or 'JACK SPARROW:'
# and capture the actual line he speaks after the colon

pattern_c = r"^JACK(?: SPARROW)?:\s*(.*)$"

jack_lines = re.findall(pattern_c, pirates_text, flags=re.MULTILINE)

# Show how many lines Jack has and preview the first 20

len(jack_lines), jack_lines[:20]

[nltk_data] Downloading package webtext to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package webtext is already up-to-date!


(193,
 ['Sorry, mate.',
  "Mind if we make a little side trip? I didn't think so.",
  'Complications arose, ensued, were overcome.',
  'Mm-hmm!',
  'Shiny?',
  "Is that how you're all feeling, then? Perhaps dear old Jack is not serving your best interests as captain?",
  'What did the bird say?',
  'Ohhh!',
  'It does me.',
  'No! Much more better. It is a *drawing* of a key. ',
  'Gentlemen, what do keys do?',
  "No! If we don't have the key, we can't open whatever it is we don't have that it unlocks. So what purpose would be served in finding whatever need be unlocked, which we don't have, without first having found the key what unlocks it?",
  "You're not making any sense at all. Any more questions?",
  'Hah! A heading. Set sail in a... mmm... a general... in *that* way - direction. ',
  'Come on, snap to and make sail, you know how this works. Come on, oy/quick, oy/quick, hey!',
  'Why is the rum always gone?',
  "Oh! *That's* why.",
  'As you were, gents.',
  'Ah!',
  'Bootstrap. 