In [None]:
# !pip install bertopic
# !pip install umap-learn # Install the correct umap package: umap-learn

In [None]:
# Necessary Imports
import pandas as pd
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
from umap.umap_ import UMAP
import umap
from hdbscan import HDBSCAN
import os
import json
import requests
import random
from google.colab import userdata

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# --- 1. Load Sentiment140 Dataset ---

# Adjust path if needed
sentiment_data = pd.read_csv("/content/drive/Shareddrives/MIS 769/sentiment140.csv", encoding='latin-1', header=None) # Updated file path
sentiment_data.columns = ['target', 'id', 'date', 'flag', 'user', 'text']

positive_tweets = sentiment_data[sentiment_data['target'] == 4]['text']

In [None]:
# --- 2. Search Positive Tweets by Brand ---
brands = ["nike", "starbucks", "netflix", "chipotle", "xbox", "tesla", "target"]

brand_counts = {}
for brand in brands:
    matches = positive_tweets[positive_tweets.str.contains(brand, case=False, na=False)]
    brand_counts[brand] = len(matches)

print(brand_counts)

{'nike': 172, 'starbucks': 838, 'netflix': 114, 'chipotle': 92, 'xbox': 493, 'tesla': 19, 'target': 348}


In [None]:
# --- 3. Select a Brand (Example: Netflix) ---
selected_brand = "starbucks"
selected_brand_tweets = positive_tweets[positive_tweets.str.contains(selected_brand, case=False, na=False)]

In [None]:
# --- 4. Text Preprocessing ---
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk
nltk.download('stopwords')

stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

def preprocess_tweet(tweet):
    tweet = re.sub(r'http\S+|www\S+|https\S+', '', tweet, flags=re.MULTILINE)
    tweet = re.sub(r'\@\w+|\#', '', tweet)
    tweet = re.sub(r'[^a-zA-Z\s]', '', tweet)
    tweet = tweet.lower()
    tweet_tokens = tweet.split()
    filtered_words = [stemmer.stem(w) for w in tweet_tokens if w not in stop_words]
    return " ".join(filtered_words)

preprocessed_tweets = selected_brand_tweets.apply(preprocess_tweet)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
## MIGHT NEED TO RUN THIS LINE BEFORE YOU CAN CONTINUE##
#!pip install bertopic

In [None]:
# --- 5. Topic Modeling with BERTopic ---

model = BERTopic()
topics, probs = model.fit_transform(preprocessed_tweets.tolist())

In [None]:
# --- 6. Explore Topics ---
topics_overview = model.get_topic_info()
topics_overview.head()


Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,394,-1_starbuck_go_day_im,"[starbuck, go, day, im, lol, love, get, got, good, like]","[starbuck one time good, would love starbuck right lol, go starbuck]"
1,0,156,0_coffe_drink_starbuck_mocha,"[coffe, drink, starbuck, mocha, ice, caramel, tea, frap, enjoy, chocol]","[starbuck coffe white chocol mocha cours, drink starbuck, need starbuck coffe]"
2,1,54,1_starbuck_that_way_someth,"[starbuck, that, way, someth, found, new, thing, first, eavesdrop, etil]","[starbuck, starbuck, starbuck]"
3,2,51,2_day_start_woke_starbuck,"[day, start, woke, starbuck, got, morn, saturday, great, earli, sleep]","[great day well, woke late got readi made bed got starbuck still got work two min earli love live, got day today still woke earli plan go enjoy coffe starbuck soon casual life favorit]"
4,3,29,3_work_go_get_starbuck,"[work, go, get, starbuck, til, ism, paper, desk, project, she]","[work starbuck, starbuck work, work starbuck]"


In [None]:

# Reduce to 20 topics using the correct list of documents
model.reduce_topics(preprocessed_tweets)

# Visualize the reduced topic map
model.visualize_topics()


In [None]:
model.visualize_hierarchy()

In [None]:
model.visualize_barchart()

In [None]:
# --- 8. Generate Gen Z Replies with OpenRouter LLM ---
import requests
import json
from google.colab import userdata
import os


In [None]:
##YOU NEED TO ADD YOUR API KEY TO openrouter like we did in Assignmnet 7 ##

In [None]:
# Safely load API key
OPENROUTER_API_KEY = userdata.get('openrouter')
os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY


In [None]:
import os
import json
import requests
import time
import pandas as pd

# --- Function to generate Gen Z replies using OpenRouter ---
def gen_z_llm_reply(user_comment):
    api_key = os.environ.get("OPENROUTER_API_KEY")
    if not api_key:
        print("Error: OPENROUTER_API_KEY environment variable is not set.")
        return "Error: Missing API key"

    payload = {
        "model": "deepseek/deepseek-chat-v3-0324:free",
        "messages": [
            {
                "role": "user",
                "content": f"""You're a Gen Z social media manager for Starbucks known for chaotic good energy and hyper-online humor.
                Reply to this tweet with a short, witty clapback or comeback using TikTok-style irony, internet slang, or niche cultural references.
                Avoid overused phrases like 'okay boomer' or cringe Gen Alpha slang. Be dry, savage, or playful — but stay under 1 sentence.

                Tweet: {user_comment}"""
            }
        ],
        "temperature": 0.8,
        "max_tokens": 100
    }

    try:
        response = requests.post(
            url="https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json",
                "HTTP-Referer": "https://colab.research.google.com",
                "X-Title": "UNLV"
            },
            data=json.dumps(payload)
        )

        response_data = response.json()

        # Handle rate limit
        if "error" in response_data:
            error_msg = response_data["error"]["message"]
            if "Rate limit exceeded" in error_msg:
                print("⚠️ Rate limit hit. Try again after a short delay.")
                return "Error: Rate limit hit"
            else:
                print(f"⚠️ API Error: {error_msg}")
                return "Error: API error"

        # Parse valid response
        if response.status_code == 200 and "choices" in response_data:
            return response_data["choices"][0]["message"]["content"].strip()
        else:
            print(f"Error {response.status_code}: Unexpected structure.")
            print("Response JSON:", response_data)
            return "Error: No reply generated."

    except Exception as e:
        print("Exception occurred:", str(e))
        return "Error: API call failed"

# --- Apply to Sampled Tweets with Delay for Rate Limit ---
sampled_tweets = selected_brand_tweets.sample(5, random_state=42)  # Assuming this is a Series

gen_z_replies = []
for tweet in sampled_tweets:
    reply = gen_z_llm_reply(tweet)
    gen_z_replies.append(reply)
    time.sleep(15)  # Stay under 4 requests per minute

# --- Create Output DataFrame ---
reply_df = pd.DataFrame({
    "original_tweet": sampled_tweets.values,
    "gen_z_reply": gen_z_replies
})



In [1]:

# # Display result
# from IPython.display import display
# display(reply_df)

In [None]:
# Display neatly
from IPython.display import display, HTML

# Option 1: Nice HTML table for clean Colab view
display(HTML(reply_df.to_html(index=False)))

# Optional if you want wide columns:
pd.set_option('display.max_colwidth', None)

original_tweet,gen_z_reply
"Starbucks will be launching free WiFi next week, not free coffee, sadly (via @drewb)","""Sorry, Drew, we can’t all be the main character—some of us just provide the ✨aesthetic✨ for your WFH era."""
I love the mountains!!! And cold weather. And mocha frappachinos from Starbucks,"""Mountains, cold weather, and a Frappuccino? Sounds like someone’s trying to summon their inner main character—winter edition. ☕❄️"""
"@gfalcone601 Nice &amp; sunny?? I'm so jealous, here it's cold &amp; the sky's grey eugh... Have a nice starbucks !! xx",✨ Maybe your coffee can bring the sunshine you're missing. ☕️🌞
"may or may not have come to school with a Starbucks... well, you love it","""Me showing up to class with a venti iced brown sugar oatmilk shaken espresso like it’s not my 4th personality trait this week 💅"""
"@katanthony Man. I had coffee bean coffee and it has completely destroyed starbucks for me. But that's okay, it's cheaper and tastier.","""Congrats on your coffee villain origin story, but we’ll be here when your wallet realizes grinding beans is a full-time job. ☕️💀"""


In [None]:
for i, row in reply_df.iterrows():
    print(f"🟢 **Original Tweet:**\n{row['original_tweet']}\n\n💬 **Gen Z Reply:**\n{row['gen_z_reply']}\n\n---\n")


🟢 **Original Tweet:**
Starbucks will be launching free WiFi next week, not free coffee, sadly  (via @drewb)

💬 **Gen Z Reply:**
"Sorry, Drew, we can’t all be the main character—some of us just provide the ✨aesthetic✨ for your WFH era."

---

🟢 **Original Tweet:**
I love the mountains!!! And cold weather. And mocha frappachinos from Starbucks 

💬 **Gen Z Reply:**
"Mountains, cold weather, and a Frappuccino? Sounds like someone’s trying to summon their inner main character—winter edition. ☕❄️"

---

🟢 **Original Tweet:**
@gfalcone601 Nice &amp; sunny?? I'm so jealous, here it's cold &amp; the sky's grey eugh... Have a nice starbucks !!  xx

💬 **Gen Z Reply:**
✨ Maybe your coffee can bring the sunshine you're missing. ☕️🌞

---

🟢 **Original Tweet:**
may or may not have come to school with a Starbucks... well, you love it  

💬 **Gen Z Reply:**
"Me showing up to class with a venti iced brown sugar oatmilk shaken espresso like it’s not my 4th personality trait this week 💅"

---

🟢 **Original