# 1. Updating ELIZA

In [None]:
# ELIZA implementation in Python
# Example Generated via ChatGPT
import re

def reflect(fragment):
    """Reflects user input to make responses more natural."""
    reflections = {
        "am": "are",
        "was": "were",
        "i": "you",
        "i'd": "you would",
        "i've": "you have",
        "i'll": "you will",
        "my": "your",
        "are": "am",
        "you've": "I have",
        "you'll": "I will",
        "your": "my",
        "yours": "mine",
        "you": "me",
        "me": "you"
    }
    words = fragment.lower().split()
    return ' '.join([reflections.get(word, word) for word in words])

def eliza_response(user_input):
    """Generates ELIZA-style responses based on input."""
    patterns = [
        (r"I want to know the reasons why I am feeling depressed all the time\.?",
         "That sounds heavy. What do you think might be contributing to feeling depressed all the time?"),
        (r"I am feeling stressed\.?",
         "Stress can be overwhelming. What is stressing you out right now?"),
        (r"My feelings towards my crush are invalidated\.?",
         "It hurts to feel invalidated. How do you wish they would respond to your feelings?"),
        (r"You (?:don't|do not) understand me\.?",
         "It seems you feel misunderstood. What do you want me to understand better?"),
        (r"I can't focus on my studies\.?|I cannot focus on my studies\.?",
         "Concentration is hard sometimes. What usually distracts you from your studies?"),
        (r"I need (.*)", "Why do you need {0}?"),
        (r"Why don't you (.*)", "Do you really think I don't {0}?"),
        (r"I feel (.*)", "Tell me more about feeling {0}.")
    ]

    for pattern, response in patterns:
        match = re.match(pattern, user_input, re.IGNORECASE)
        # print(match)
        if match:
            if match.lastindex:
                # captures the substring after the pattern
                return response.format(reflect(match.group(1)))
            return response

    return "Can you tell me more?"

print("ELIZA: Hello! How can I help you today?")
last_user_input = None
sarcastic_replies = [
    "You already asked that. Hoping for a different answer this time?",
    "Same question again? Interesting strategy.",
    "We just covered that. Any new thoughts?"
]
while True:
    user_input = input("You: ")
    print(f"You: {user_input}")
    if user_input.lower() in ["quit", "exit"]:
        print("ELIZA: Goodbye!")
        break
    normalized_input = re.sub(r"\s+", " ", user_input.strip().lower())
    if normalized_input == last_user_input:
        print(f"ELIZA: {sarcastic_replies[0]}")
        sarcastic_replies.append(sarcastic_replies.pop(0))
        continue
    last_user_input = normalized_input
    print(f"ELIZA: {eliza_response(user_input)}")

ELIZA: Hello! How can I help you today?
You: I want to know the reasons why I am feeling depressed all the time.
ELIZA: That sounds heavy. What do you think might be contributing to feeling depressed all the time?
You: I don't know
ELIZA: Can you tell me more?
You: I am feeling stressed.
ELIZA: Stress can be overwhelming. What is stressing you out right now?
You: I am feeling stressed.
ELIZA: You already asked that. Hoping for a different answer this time?
You: I am feeling stressed.
ELIZA: Same question again? Interesting strategy.
You: 
ELIZA: Can you tell me more?


# 2. Implementing RegEx on NLP

## a.

In [1]:
# Implementing RegEx on NLP - Preprocessing
import re

text = (
    "Alice was beginning to get very tired of sitting by her sister on the bank, "
    "and of having nothing to do. Once or twice she had peeped into the book her "
    "sister was reading, but it had no pictures or conversations in it, "
    "\"and what is the use of a book,\" thought Alice, \"without pictures or conversations?\""
)

# RegEx pattern to extract words starting with an uppercase letter
pattern = r"\b[A-Z][a-zA-Z]*\b"

matches = re.findall(pattern, text)
print("RegEx pattern:", pattern)
print("Matches:", matches)


RegEx pattern: \b[A-Z][a-zA-Z]*\b
Matches: ['Alice', 'Once', 'Alice']


## b.

In [5]:
# RegEx: find Whale/Whales/whale/whales and replace ALL with "leviathan"
import re

file_path = r"C:\Users\ASUS\Desktop\SecondSem\CCS-249_25-26_Activities\BSCS 3A\JANFLOYD_VALLOTA\melville-moby_dick.txt"
output_path = r"C:\Users\ASUS\Desktop\SecondSem\CCS-249_25-26_Activities\BSCS 3A\JANFLOYD_VALLOTA\melville-moby_dick_leviathan.txt"

with open(file_path, "r", encoding="utf-8") as f:
    text = f.read()

pattern = r"\bwhales?\b"

matches = re.findall(pattern, text, flags=re.IGNORECASE)
print("RegEx pattern:", pattern)
print("Total matches:", len(matches))

# Replace ALL instances (case-insensitive) and write to a new file
replaced_text = re.sub(pattern, "leviathan", text, flags=re.IGNORECASE)
with open(output_path, "w", encoding="utf-8") as f:
    f.write(replaced_text)

print("\nSaved replaced text to:", output_path)

# Show the first 10 instances AFTER replacement (sentences with "leviathan")
replaced_sentences = re.split(r"(?<=[.!?])\s+", replaced_text)
shown = 0
print("\nFirst 10 sentences containing leviathan:\n")
for sentence in replaced_sentences:
    if re.search(r"\bleviathan\b", sentence, flags=re.IGNORECASE):
        shown += 1
        print(f"{shown}. {sentence.strip()}")
        if shown == 10:
            break


RegEx pattern: \bwhales?\b
Total matches: 1500

Saved replaced text to: C:\Users\ASUS\Desktop\SecondSem\CCS-249_25-26_Activities\BSCS 3A\JANFLOYD_VALLOTA\melville-moby_dick_leviathan.txt

First 10 sentences containing leviathan:

1. The Project Gutenberg eBook of Moby Dick; Or, The leviathan
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever.
2. Title: Moby Dick; Or, The leviathan

Author: Herman Melville

Release date: July 1, 2001 [eBook #2701]
                Most recently updated: January 19, 2025

Language: English

Credits: Daniel Lazarus, Jonesey, and David Widger


*** START OF THE PROJECT GUTENBERG EBOOK MOBY DICK; OR, THE leviathan ***




MOBY-DICK;

or, THE leviathan.
3. The Whiteness of the leviathan.
4. Of the Monstrous Pictures of leviathan.
5. Of the Less Erroneous Pictures of leviathan, and the True
Pictures of Whaling Scenes.
6. Of leviathan in Paint; in Teeth; 

## c.

In [2]:
# NLTK webtext: extract lines spoken by Jack Sparrow
import re
import nltk
from nltk.corpus import webtext

nltk.download("webtext", quiet=True)

pirates_text = webtext.raw("pirates.txt")

# RegEx pattern to capture Jack Sparrow's lines
pattern = r"^JACK.*"

jack_lines = re.findall(pattern, pirates_text, flags=re.MULTILINE)
print("RegEx pattern:", pattern)
print("Total Jack Sparrow lines:", len(jack_lines))
print("All Jack Sparrow lines:")
for line in jack_lines:
    print(line)


RegEx pattern: ^JACK.*
Total Jack Sparrow lines: 193
All Jack Sparrow lines:
JACK SPARROW: Sorry, mate.
JACK SPARROW: Mind if we make a little side trip? I didn't think so.
JACK SPARROW: Complications arose, ensued, were overcome.
JACK SPARROW: Mm-hmm!
JACK SPARROW: Shiny?
JACK SPARROW: Is that how you're all feeling, then? Perhaps dear old Jack is not serving your best interests as captain?
JACK SPARROW: What did the bird say?
JACK SPARROW: Ohhh!
JACK SPARROW: It does me.
JACK SPARROW: No! Much more better. It is a *drawing* of a key. 
JACK SPARROW: Gentlemen, what do keys do?
JACK SPARROW: No! If we don't have the key, we can't open whatever it is we don't have that it unlocks. So what purpose would be served in finding whatever need be unlocked, which we don't have, without first having found the key what unlocks it?
JACK SPARROW: You're not making any sense at all. Any more questions?
JACK SPARROW: Hah! A heading. Set sail in a... mmm... a general... in *that* way - direction. 
JAC