In [14]:
!pip install streamlit pyngrok vaderSentiment scikit-learn pandas


Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [15]:
from google.colab import files
uploaded = files.upload()  # Click on the upload button to upload the file


Saving Yelp Restaurant Reviews.csv to Yelp Restaurant Reviews.csv


In [16]:
import pandas as pd

# 3a. Load and inspect
df = pd.read_csv("Yelp Restaurant Reviews.csv")
print("Columns:", df.columns.tolist())
df.head()

# 3b. Create a `name` column by parsing the Yelp URL slug:
#     e.g. "https://www.yelp.com/biz/spice-garden-delhi" → "Spice Garden Delhi"
def extract_name(url):
    try:
        slug = url.split("/biz/")[-1].split("?")[0]
        return slug.replace("-", " ").title()
    except:
        return "Unknown"

df["name"] = df["Yelp URL"].apply(extract_name)
df = df.rename(columns={"Review Text": "review_text"})
df = df.dropna(subset=["name", "review_text"])
print(f"After cleaning: {df.shape[0]} rows")


Columns: ['Yelp URL', 'Rating', 'Date', 'Review Text']
After cleaning: 19896 rows


In [17]:
%%bash
cat > streamlit_app.py << 'EOF'
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# ——— Load & prepare data ———
df = pd.read_csv("Yelp Restaurant Reviews.csv")
df["name"] = df["Yelp URL"].apply(lambda url: url.split("/biz/")[-1].split("?")[0].replace("-", " ").title())
df = df.rename(columns={"Review Text": "review_text"}).dropna(subset=["name", "review_text"])
df["tags_combined"] = df["review_text"]

# ——— Vectorizer & Sentiment model ———
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df["tags_combined"])
analyzer = SentimentIntensityAnalyzer()

# ——— Recommendation logic ———
def generate_recommendations(keywords, mood_input):
    sentiment = analyzer.polarity_scores(mood_input)['compound']
    vec = vectorizer.transform([" ".join(keywords)])
    sims = cosine_similarity(vec, tfidf_matrix).flatten()
    df["match_score"] = sims
    df["sentiment_adjusted_score"] = df["match_score"]
    if sentiment < -0.2:
        df.loc[df["review_text"].str.contains("calm|cozy|quiet", case=False), "sentiment_adjusted_score"] += 0.1
    elif sentiment > 0.2:
        df.loc[df["review_text"].str.contains("fun|spicy|lively", case=False), "sentiment_adjusted_score"] += 0.1
    top5 = df.nlargest(5, "sentiment_adjusted_score")
    return top5, sentiment

# ——— Streamlit UI ———
st.title("🍽️ Smart Restaurant Recommender")

kw = st.text_input("Keywords (comma separated):", "spicy, cozy")
mood = st.text_area("Describe your current mood:", "Feeling great and energetic!")

if st.button("Get Recommendations"):
    klist = [k.strip() for k in kw.split(",")]
    recs, score = generate_recommendations(klist, mood)
    st.write(f"**Mood sentiment score**: {score:.2f}")
    st.write("### Top 5 Recommendations:")
    for _, r in recs.iterrows():
        st.markdown(f"**{r['name']}**  •  Match {r['match_score']:.2f} → Adj {r['sentiment_adjusted_score']:.2f}")
        st.markdown(f"> {r['review_text'][:150]}…")
        st.markdown("---")
EOF


In [18]:
from pyngrok import ngrok

# 1️⃣ Set your real ngrok authtoken here:
ngrok.set_auth_token("2wdpIl4XSylHRaYIwbs996cu688_28H3QfAoHeooE1TnfsbQa")

# 2️⃣ Start the Streamlit app in background
get_ipython().system_raw("streamlit run streamlit_app.py &")

# 3️⃣ Open the ngrok tunnel
public_url = ngrok.connect(8501)
print("✨ Your app is live at:", public_url)


✨ Your app is live at: NgrokTunnel: "https://15f5-34-125-1-213.ngrok-free.app" -> "http://localhost:8501"
