In [7]:
import os
import pandas as pd

def load_reviews(directory, label):
    texts = []
    for fname in os.listdir(directory):
        with open(os.path.join(directory, fname), encoding='utf-8') as f:
            texts.append(f.read())
    return pd.DataFrame({'review': texts, 'sentiment': label})

pos_reviews = load_reviews('aclImdb/train/pos', 1).head(1000)
neg_reviews = load_reviews('aclImdb/train/neg', 0).head(1000)

df = pd.concat([pos_reviews, neg_reviews]).reset_index(drop=True)


In [8]:
import re

def clean_text(text):
    text = re.sub(r'<.*?>', '', text)  # remove HTML
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # keep only letters
    return text.lower()

df['cleaned'] = df['review'].apply(clean_text)


In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['cleaned'])

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, df['sentiment'], test_size=0.2, random_state=42)

clf = LogisticRegression()
clf.fit(X_train, y_train)

print("Accuracy:", clf.score(X_test, y_test))


Accuracy: 0.885


In [11]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_movies(user_review, top_n=5):
    cleaned = clean_text(user_review)
    vec = vectorizer.transform([cleaned])
    
    sentiment = clf.predict(vec)[0]
    proba = clf.predict_proba(vec)[0]
    
    print("Predicted Sentiment:", "Positive" if sentiment == 1 else "Negative")
    if sentiment == 1:
      print(f"Confidence: {proba[1]:.2f}")
    else:
      print(f"Confidence: {proba[0]:.2f}")
      return "Sorry, no recommendations based on negative sentiment."

    pos_indices = df[df['sentiment'] == 1].index
    pos_vectors = X[pos_indices]

    sims = cosine_similarity(vec, pos_vectors).flatten()
    top_indices = pos_indices[sims.argsort()[-top_n:][::-1]]

    return df.loc[top_indices, 'review']


In [12]:
user_input = "It was just bad, illogical climax, poor acting, worst movie ever."
recommendations = recommend_movies(user_input)
if isinstance(recommendations, str):
    print(recommendations) 
else:
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec[:300]}...\n")

Predicted Sentiment: Negative
Confidence: 0.94
Sorry, no recommendations based on negative sentiment.


In [13]:
user_input = "Best Movie ever seen, great casting, acting and music was too good."
recommendations = recommend_movies(user_input)
if isinstance(recommendations, str):
    print(recommendations) 
else:
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec[:300]}...\n")

Predicted Sentiment: Positive
Confidence: 0.71
1. This entire movie is worth watching just for the magnificent final moment - its the best ending of any movie I've ever seen. Perfect, beautiful, funny, simply wonderful.<br /><br />I found this movie delightful, even with it's French taking-itself-too-seriously deep meanings thing going on. I loved ...

2. good movie, good music, good background and an acceptable plot. but the main point again as his movies tend to be, the man is the best actor in idia and can turn dust into gold. nana patekar. this may be his second best performance after parinda( others may disagree). although other movies are not f...

3. When I saw this movie I was stunned by what a great movie it was. This is the only movie I think I would ever give a 10 star rating. I am sure this movie will always be in my top 5.<br /><br />The acting is superb. Leonardo DiCaprio and Kate Winslett are at their best. I don't think anyone could hav...

4. all the acting done in the