### Updating ELIZA (With Bonus)

In [None]:
import re

def reflect(fragment):
    """Reflects user input to make responses more natural."""
    reflections = {
        "am": "are",
        "was": "were",
        "i": "you",
        "i'd": "you would",
        "i've": "you have",
        "i'll": "you will",
        "my": "your",
        "are": "am",
        "you've": "I have",
        "you'll": "I will",
        "your": "my",
        "yours": "mine",
        "you": "me",
        "me": "you"
    }
    words = fragment.lower().split()
    return ' '.join([reflections.get(word, word) for word in words])

def eliza_response(user_input):
    """Generates ELIZA-style responses based on input."""
    patterns = [
        # --- NEW PATTERNS ADDED BELOW ---
        
        # 1. Capture: "I want to know the reasons why I am feeling depressed all the time."
        # Matches "I want to know" followed by anything
        (r"I want to know (.*)", "Why is it important for you to know {0}?"),

        # 2. Capture: "I am feeling stressed."
        # Matches "I am feeling" followed by anything
        (r"I am feeling (.*)", "How long have you been feeling {0}?"),

        # 3. Capture: "My feelings towards my crush are invalidated."
        # Matches "My feelings" followed by anything
        (r"My feelings (.*)", "Why do you feel that your feelings {0}?"),

        # 4. Capture: "You don’t understand me" OR "You do not understand me."
        # Uses (don’t|do not) to capture either variation
        (r"You (don’t|do not) understand me", "What makes you think I {0} understand you? I am trying my best."),

        # 5. Capture: "I can’t focus on my studies" OR "I cannot focus on my studies."
        # Uses (can’t|cannot) to capture either variation
        (r"I (can’t|cannot) focus (.*)", "What is distracting you from focusing {0}?"),

        # --- END NEW PATTERNS ---

        (r"I need (.*)", "Why do you need {0}?"),
        (r"Why don’t you (.*)", "Do you really think I don't {0}?"),
        (r"I feel (.*)", "Tell me more about feeling {0}.")
    ]
    
    for pattern, response in patterns:
        match = re.match(pattern, user_input, re.IGNORECASE)
        if match:
            # We use the last captured group for reflection in most cases
            # For patterns with multiple groups (like 'don't|do not'), we might need specific indexing
            # But specifically for the reflection, we usually want the "content" part.
            
            # Logic check: If the match has groups, pick the one that contains the content to reflect.
            captured_text = match.groups()[-1] if match.groups() else ""
            
            return response.format(reflect(captured_text))
    
    return "Can you tell me more?"

print("ELIZA: Hello! How can I help you today?")

# BONUS: Variable to store the previous input
previous_input = ""

while True:
    user_input = input("You: ")
    
    if user_input.lower() in ["quit", "exit"]:
        print("ELIZA: Goodbye!")
        break
    
    # --- BONUS: REPETITION CHECK ---
    if user_input.lower() == previous_input.lower():
        print("ELIZA: You just said that. Are you stuck in a loop?")
        continue # Skip the rest of the loop and ask for input again
    # -------------------------------

    # Update history
    previous_input = user_input
    
    print(f"ELIZA: {eliza_response(user_input)}")

### Extract all of the words starting with an upper case letter from the text:

In [None]:
import re

text = 'Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do.  Once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, "and what is the use of a book," thought Alice, "without pictures or conversations?'

# Compile the pattern
pattern = r'\b[A-Z]\w*'

# Extract matches
matches = re.findall(pattern, text)

print(matches)

In [None]:
import re

filename = 'melville-moby_dick.txt'

try:
    with open(filename, 'r', encoding='utf-8') as file:
        content = file.read()

    # Define the pattern for: Whale, Whales, whale, whales
    pattern = r"\b[Ww]hales?\b"

    # 1. Extract all instances
    matches = re.findall(pattern, content)
    print(f"Found {len(matches)} instances of the target words.")

    # 2. Replace the first 10 instances with "leviathan"
    # The 'count' parameter limits the replacement to the first n occurrences
    modified_content = re.sub(pattern, "leviathan", content, count=10)

    # Verification: Print a snippet or save the file
    print("Replacement complete. The first 10 instances have been changed to 'leviathan'.")
    
    # Optional: Write to a new file to see the result
    with open('melville-moby_dick_modified.txt', 'w', encoding='utf-8') as f:
        f.write(modified_content)

except FileNotFoundError:
    print(f"Error: The file '{filename}' was not found.")

In [None]:
import nltk
import re
from nltk.corpus import webtext

# 1. Download the necessary NLTK data (if not already present)
nltk.download('webtext')

# 2. Load the pirates.txt file content
# raw() returns the file content as a single string
pirates_content = webtext.raw('pirates.txt')

# Define the RegEx pattern to find lines spoken by Jack Sparrow
# Note: In the text file, speakers are typically uppercase (e.g., JACK SPARROW:)
pattern = r"^JACK SPARROW:\s*(.*)"

# 3. Extract all instances using the pattern
# re.MULTILINE is essential so '^' matches the start of each line, not just the string
jack_lines = re.findall(pattern, pirates_content, flags=re.MULTILINE)

# Display the number of lines found and the first few examples
print(f"Found {len(jack_lines)} lines spoken by Jack Sparrow.\n")

print("--- First 5 Lines ---")
for line in jack_lines[:5]:
    print(line)