In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

In [15]:
from google.colab import files

uploaded = files.upload()
import pandas as pd
import io

# Get the actual filename from the uploaded dictionary
filename = list(uploaded.keys())[0]

# Read the CSV using the correct filename
data = pd.read_csv(io.BytesIO(uploaded[filename]))
print(data)

Saving swiggy.csv.csv to swiggy.csv (6).csv
        ID               Area       City  Restaurant Price  Avg Rating  \
0        1             Suburb  Ahmedabad               600         4.2   
1        2  Business District       Pune               200         4.7   
2        3             Suburb  Bangalore               600         4.7   
3        4  Business District     Mumbai               900         4.0   
4        5          Tech Park     Mumbai               200         4.7   
...    ...                ...        ...               ...         ...   
7995  7996        City Center     Mumbai               300         4.0   
7996  7997           Downtown    Chennai               100         4.7   
7997  7998          Tech Park    Chennai               900         4.5   
7998  7999           Old Town      Delhi               500         4.2   
7999  8000           Downtown      Delhi               400         4.5   

      Total Rating        Food Item       Food Type Delivery Time  

In [16]:
data["Review"] = data["Review"].str.lower()
data["Review"] = data["Review"].replace(r'[^a-z0-9\s]', '', regex=True)

data['sentiment'] = data['Avg Rating'].apply(lambda x: 1 if x > 3.5 else 0)
data = data.dropna()


In [17]:
max_features = 5000
max_length = 200

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(data["Review"])
X = pad_sequences(tokenizer.texts_to_sequences(data["Review"]), maxlen=max_length)
y = data['sentiment'].values

In [18]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42, stratify=y_train
)

In [19]:
model = Sequential([
    Embedding(input_dim=max_features, output_dim=16, input_length=max_length),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)



In [20]:
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=32,
    validation_data=(X_val, y_val),
    verbose=1
)

score = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {score[1]:.2f}")

Epoch 1/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 42ms/step - accuracy: 0.7234 - loss: 0.6028 - val_accuracy: 0.7156 - val_loss: 0.5999
Epoch 2/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 48ms/step - accuracy: 0.7168 - loss: 0.5963 - val_accuracy: 0.7156 - val_loss: 0.5977
Epoch 3/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 40ms/step - accuracy: 0.7226 - loss: 0.5904 - val_accuracy: 0.7156 - val_loss: 0.5971
Epoch 4/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 40ms/step - accuracy: 0.7170 - loss: 0.5954 - val_accuracy: 0.7156 - val_loss: 0.5966
Epoch 5/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 40ms/step - accuracy: 0.7126 - loss: 0.6007 - val_accuracy: 0.7156 - val_loss: 0.5993
Test accuracy: 0.72


In [21]:
def predict_sentiment(review_text):
    text = review_text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length)

    prediction = model.predict(padded)[0][0]
    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"

sample_review = "The food was great."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The food was great.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step
Sentiment: Positive (Probability: 0.73)
