# CCS-249 Exercise Unit 2: ELIZA and RegEx NLP

## Exercise 1: Updating ELIZA
Builds a chatbot that recognizes patterns using regex and responds to specific questions. Includes 5 emotional patterns and sarcastic responses for repeated questions.

In [None]:
# 1. Updating ELIZA
import re
import random

def reflect(fragment):
    """Swap pronouns to reflect user input back to them."""
    reflections = {
        "am": "are",
        "was": "were",
        "i": "you",
        "i'd": "you would",
        "i've": "you have",
        "i'll": "you will",
        "my": "your",
        "are": "am",
        "you've": "I have",
        "you'll": "I will",
        "your": "my",
        "yours": "mine",
        "you": "me",
        "me": "you"
    }
    return ' '.join(reflections.get(word.lower(), word) for word in fragment.split())

def eliza_response(user_input):
    """Generate ELIZA response based on pattern matching."""
    patterns = [
        (r"I need (.*)", "Why do you need {0}?"),
        (r"Why don['']t you (.*)", "Do you really think I don't {0}?"),
        (r"I feel (.*)", "Tell me more about feeling {0}."),
        (r"I want to know the reasons why I am feeling depressed all the time\.?",
         "Why do you want to know the reasons for feeling depressed all the time?"),
        (r"I am feeling stressed\.?", "Tell me more about why you're feeling stressed."),
        (r"My feelings towards my crush are invalidated\.?",
         "Why do you think your feelings towards your crush are invalidated?"),
        (r"You (don['']t|do not) understand me\.?",
         "Why do you think I don't understand you?"),
        (r"I (can['']t|cannot) focus on my studies\.?",
         "What is making it difficult for you to focus on your studies?")
    ]
    
    for pattern, response in patterns:
        match = re.match(pattern, user_input, re.IGNORECASE)
        if match and match.groups():
            return response.format(reflect(match.group(1)))
        elif match:
            return response
    return "Tell me more about that."

previous_questions = []
sarcasm = [
    "We've been through this already. Try something new.",
    "Seriously? Again? I thought we moved on.",
    "Oh wow, you're stuck on that? Let's call it a day.",
    "Asking me the same thing twice won't change anything.",
    "What, did you forget I already answered that?",
    "Congratulations, you've discovered the loop function.",
    "I'm not a broken record, even if you're testing me."
]

print("ELIZA: Hello! How can I help you today?")
while True:
    user_input = input("You: ").strip()
    
    if user_input.lower() in ["quit", "exit"]:
        print("ELIZA: Goodbye!")
        break
    
    if not user_input:
        continue
    
    if user_input.lower() in [q.lower() for q in previous_questions]:
        print(f"ELIZA: {random.choice(sarcasm)}\n")
    else:
        response = eliza_response(user_input)
        print(f"ELIZA: {response}\n")
        previous_questions.append(user_input)

## Exercise 2: Implementing RegEx on NLP
Applies regex patterns to extract and process text data.

### Part A: Extract Capitalized Words
Use regex to find all words that start with a capital letter in the text.

In [None]:
# Part A: Extract words starting with uppercase
import re

print("=" * 50)
print("PART A: Extract Capitalized Words")
print("=" * 50)

text = """Alice was beginning to get very tired of sitting by her sister on the bank,
and of having nothing to do. Once or twice she had peeped into the book
her sister was reading, but it had no pictures or conversations in it, "and
what is the use of a book," thought Alice, "without pictures or conversations?"""

pattern = r'\b[A-Z]\w*'
words = re.findall(pattern, text)

print(f"Pattern: {pattern}")
print(f"Capitalized words: {words}\n")

### Part B: Extract and Replace from Literary Text
Read Moby Dick text file and replace the first 10 instances of "Whale/whale" with "leviathan".

In [None]:
# Part B: Extract and replace Whale/whales in Moby Dick
print("=" * 50)
print("PART B: Extract and Replace Whale/Whales")
print("=" * 50)

try:
    with open(r'c:\Users\DELL\Desktop\CCS-249_25-26_Activities\BSCS 3A\KYLA_ELIJAH_RAMIRO\melville-moby_dick.txt', 'r', encoding='utf-8') as f:
        text = f.read()
    
    pattern = r'\b(Whale|Whales|whale|whales)\b'
    matches = re.findall(pattern, text)
    
    print(f"Pattern: {pattern}")
    print(f"Total matches: {len(matches)}")
    print(f"First 10: {matches[:10]}\n")
    
    # Replace first 10 instances
    counter = [0]
    def replace_first_ten(m):
        counter[0] += 1
        return "leviathan" if counter[0] <= 10 else m.group(0)
    
    modified = re.sub(pattern, replace_first_ten, text)
    print(f"First 10 instances replaced with 'leviathan'\n")
    
except FileNotFoundError:
    print("Error: melville-moby_dick.txt not found\n")

### Part C: Extract Character Dialogue from NLTK Corpus
Use NLTK to load the pirates.txt file and extract all lines spoken by Jack Sparrow.

In [None]:
# Part C: Extract Jack Sparrow lines from NLTK Pirates corpus
print("=" * 50)
print("PART C: Extract Jack Sparrow Dialogue")
print("=" * 50)

try:
    import nltk
    from nltk.corpus import webtext
    
    nltk.download('webtext', quiet=True)
    
    text = webtext.raw('pirates.txt')
    pattern = r'JACK SPARROW:\s*(.+?)(?=\n[A-Z\s]+:|$)'
    
    lines = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
    
    print(f"Pattern: {pattern}")
    print(f"Total Jack Sparrow lines: {len(lines)}\n")
    print("First 5 lines:")
    for i, line in enumerate(lines[:5], 1):
        clean = line.strip().replace('\n', ' ')[:80]
        print(f"{i}. {clean}...\n")
        
except ImportError:
    print("Error: NLTK not installed. Run: pip install nltk")