In [None]:
# Install the bertopic library
# !pip install bertopic

# Necessary

In [None]:
# Necessary Imports
import pandas as pd
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
from umap.umap_ import UMAP
import umap
from hdbscan import HDBSCAN
import os
import json
import requests
import random
from google.colab import userdata

In [None]:
# Retrieve your OpenRouter API key securely
OPENROUTER_API_KEY = userdata.get('openrouter')
os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY

In [None]:
# Load previously saved BERTopic models
positive_model = BERTopic.load("/content/drive/MyDrive/1.a SCHOOL/1.A) Grad School/7. Spring 2025/2. MIS 781/Client/positive_model")
negative_model = BERTopic.load("/content/drive/MyDrive/negative_model")


In [None]:
# Get top 20 topics for each group with their counts
positive_topics = positive_model.get_topic_info().head(20)
negative_topics = negative_model.get_topic_info().head(20)


In [None]:
# Calculate total counts for positive and negative reviews
positive_total = positive_model.get_topic_info()['Count'].sum()
negative_total = negative_model.get_topic_info()['Count'].sum()

In [None]:
# Enhanced Function to generate summary using OpenRouter with expanded coverage
def summarize_comments(comments):
    if not comments:
        return "No comments available to summarize."

    # Expand to a larger sample of comments (up to 500, randomly selected if too many)
    sample_size = min(500, len(comments))
    sampled_comments = random.sample(comments, sample_size)

    summary_prompt = (
        "Summarize the following customer reviews from casino guests succinctly, "
        "highlighting key insights such as what customers liked or disliked most about their experiences, "
        "specific aspects mentioned frequently (e.g., casino games, dining options, hotel stays, staff interactions, or parking), "
        "and overall sentiments or recommendations. Use as many reviews as possible for a comprehensive summary.\n\n"
    ) + "\n".join(sampled_comments)

    # Truncate prompt if it's too long for the model
    if len(summary_prompt) > 6000:
        summary_prompt = summary_prompt[:6000]  # Ensuring we don't exceed token limits

    payload = {
        "model": "google/gemini-2.0-pro-exp-02-05:free",
        "messages": [{"role": "user", "content": summary_prompt}],
        "max_tokens": 300,
        "temperature": 0.5
    }

    response = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {os.environ['OPENROUTER_API_KEY']}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://colab.research.google.com",
            "X-Title": "UNLV"
        },
        data=json.dumps(payload)
    )

    response_json = response.json()

    if "choices" in response_json and response_json["choices"]:
        return response_json["choices"][0]["message"]["content"]
    else:
        error_message = response_json.get('error', {}).get('message', 'No summary available and no error message provided.')
        return f"Error or empty response from API: {error_message}"


In [None]:
# Retrieve and summarize comments for the first positive topic as a quick test:
topic_id = positive_topics['Topic'].iloc[10]
topic_comments = positive_model.get_representative_docs(topic_id)
print(topic_comments)


{-1: [' Staff are very prompt and friend  Friendly staff  Friendly staff', 'I enjoyed playing the slots,. the machines were just very tight.  Theres hardly any good video poker machines in the place, initially when they were doing the remodeling we were told they were in the basement and they would bring them back but they havent, The food is excelent at all of the places that are open, but I feel you need a coffee shop, where you can sit down, have a more intimate feel, more variety of foods.because while im playing people always stop and ask me if you have a coffe shop and that th  My check in was weird.  When I got to the hotel my room wasnt ready so I told John at the front desk Ill go play a while and come back and get keys when the room was ready which I did.  I gathered my bags and when I went to my room I noticed their was not  see previous comments ', 'Staff are very friendly. Prompt and friendly staff.    '], 0: [' I miss the buffet!    ', ' Miss the buffet!    ', ' buffet   

In [None]:
# # Function to summarize comments with OpenRouter API (optimized)
# def summarize_comments(comments):
#     if not comments:
#         return "No representative comments available."
#     summary_prompt = "Summarize the following customer comments succinctly:\n\n" + "\n".join(comments[:20])
#     payload = {
#         "model": "google/gemini-2.0-pro-exp-02-05:free",
#         "messages": [{"role": "user", "content": summary_prompt}],
#         "max_tokens": 150,
#         "temperature": 0.5
#     }

#     response = requests.post(
#         url="https://openrouter.ai/api/v1/chat/completions",
#         headers={
#             "Authorization": f"Bearer {os.environ['OPENROUTER_API_KEY']}",
#             "Content-Type": "application/json",
#             "HTTP-Referer": "https://colab.research.google.com",
#             "X-Title": "UNLV"
#         },
#         data=json.dumps(payload)
#     )

#     response_json = response.json()

#     if "choices" in response_json and len(response_json["choices"]) > 0:
#         return response_json["choices"][0]["message"]["content"]
#     else:
#         return "No summary reply found in the response."

Topic 0 is generally the most significant or the most frequent topic identified by the BERTopic model.
The numerical values shown on the bottom axis (x-axis) represent the c-TF-IDF scores, which measure the importance of terms within that topic relative to all other topics. These scores are not directly percentages of comments but rather relative weights indicating how strongly associated these terms are with the topic.

In [None]:
# Generate summaries for positive topics with counts
print("\nPositive Topic Summaries with Counts:")
for _, row in positive_topics.iterrows():
    topic_id = row['Topic']
    count = row['Count']
    percentage = (count / positive_total) * 100
    topic_comments = positive_model.get_representative_docs(topic_id)
    summary = summarize_comments(topic_comments)
    print(f"\nTopic {topic_id} (Count: {count}, {percentage:.2f}%):")
    print(summary)


Positive Topic Summaries with Counts:

Topic -1 (Count: 49459, 37.89%):
Here's a summary of the casino guest reviews, highlighting key insights:

**Key Insights:**

*   **Positive:**
    *   **Staff Friendliness and Promptness:** Overwhelmingly positive comments about staff interactions. This is the most frequently mentioned and praised aspect.
    *   **Food Quality:** The food at the available restaurants is considered "excellent."

*   **Negative:**
    *   **Slot Machine Tightness:** Guests felt the slot machines were "very tight" (low payout).
    *   **Lack of Video Poker:** A significant complaint is the absence of good video poker machines, despite previous promises.
    *   **Missing Coffee Shop:** A strong desire for a more casual, intimate coffee shop with a wider variety of food options was expressed.
    *  **Check in issues:** Room was not ready.

**Frequently Mentioned Aspects:**

*   **Staff:** Friendliness and promptness are consistently highlighted.
*

Topic 0 (Count

In [None]:
# Generate summaries for negative topics with counts
print("\nNegative Topic Summaries with Counts:")
for _, row in negative_topics.iterrows():
    topic_id = row['Topic']
    count = row['Count']
    percentage = (count / negative_total) * 100
    topic_comments = negative_model.get_representative_docs(topic_id)
    summary = summarize_comments(topic_comments)
    print(f"\nTopic {topic_id} (Count: {count}, {percentage:.2f}%):")
    print(summary)



Negative Topic Summaries with Counts:

Topic -1 (Count: 28073, 41.20%):
Here's a succinct summary of the customer reviews, highlighting key insights:

**Overall Sentiment:** Mixed. While some guests enjoyed aspects of their stay (specific restaurants, friendly check-in), a significant number of reviews point to serious service deficiencies and inconsistencies, impacting the overall experience.

**Key Likes:**

*   **Friendly Staff (sometimes):** Check-in/out staff were consistently praised for friendliness.
*   **Dining (Specific Restaurants):** Excellent food was mentioned at Asia and Bugatti's.

**Key Dislikes:**

*   **Beverage Service (Major Issue):** Consistently poor beverage service on the casino floor was a major complaint. This included long waits, forgotten orders, difficulty finding staff, and broken self-serve ice machines.
*   **Angry Butcher (Major Issue):** Repeatedly described as a very poor dining experience, with issues in service, food quality, and management.
*   *

In [None]:
# Visualize the distribution of positive topics clearly
positive_model.visualize_barchart(top_n_topics=20)



In [None]:
# Visualize the distribution of negative topics clearly
negative_model.visualize_barchart(top_n_topics=20)


In [None]:
positive_model.visualize_hierarchy(custom_labels=True)


In [None]:
negative_model.visualize_hierarchy(custom_labels=True)


In [None]:
positive_model.visualize_topics()

In [None]:
negative_model.visualize_topics()

In [None]:
positive_model.visualize_barchart(top_n_topics=10, custom_labels=True)
