<a href="https://colab.research.google.com/github/yukti468/task/blob/main/MovieRecommenderipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import zipfile
import pandas as pd
import ast
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
with zipfile.ZipFile("/content/archive.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/")

In [13]:
movies = pd.read_csv("/content/tmdb_5000_movies.csv")
credits = pd.read_csv("/content/tmdb_5000_credits.csv")

In [14]:
movies = movies.merge(credits, left_on='id', right_on='movie_id')

def get_names(text, key='name', top=3):
    try:
        return ' '.join([i[key].replace(" ", "") for i in ast.literal_eval(text)[:top]])
    except:
        return ''

In [15]:
movies['cast'] = movies['cast'].apply(lambda x: get_names(x, 'name', 3))
movies['crew'] = movies['crew'].apply(lambda x: next((i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director'), ''))
movies['genres'] = movies['genres'].apply(lambda x: get_names(x))
movies['keywords'] = movies['keywords'].apply(lambda x: get_names(x))

In [16]:
# Combined features
movies['combined_features'] = (
    movies['genres'] + ' ' +
    movies['keywords'] + ' ' +
    movies['tagline'].fillna('') + ' ' +
    movies['cast'] + ' ' +
    movies['crew']
)

In [17]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

In [18]:
cosine_sim = cosine_similarity(tfidf_matrix)
movie_titles = movies['title_x'].tolist()
movie_titles_lower = [title.lower() for title in movie_titles]

In [19]:

C = movies['vote_average'].mean()
m = movies['vote_count'].quantile(0.75)

In [20]:
def weighted_rating(x, m=m, C=C):
    v = x['vote_count']
    R = x['vote_average']
    return (v / (v + m)) * R + (m / (m + v)) * C

In [21]:
movies['weighted_score'] = movies.apply(weighted_rating, axis=1)

In [26]:
def recommend_movie(input_movie):
    input_movie_lower = input_movie.lower()
    close_match = difflib.get_close_matches(input_movie_lower, movie_titles_lower, n=1)

    if not close_match:
        print(f"❌ No match found for '{input_movie}'. Please try again.")
        return

    matched_index = movie_titles_lower.index(close_match[0])
    matched_title = movie_titles[matched_index]
    index = movies[movies.title_x == matched_title].index[0]

    similarity_scores = list(enumerate(cosine_sim[index]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:50]  # Top 50 to filter by score

    top_25 = sorted(
        sorted_scores,
        key=lambda x: movies.iloc[x[0]]['weighted_score'],
        reverse=True
    )[:25]

    print(f"\n🎬 Because you watched '{matched_title}', you might like:\n")
    for i, (idx, _) in enumerate(top_25, 1):
        title = movies.iloc[idx]['title_x']
        score = round(movies.iloc[idx]['weighted_score'], 2)
        print(f"{i}. {title} (Score: {score})")

In [27]:
movie_input = input("Enter a movie you like: ")
recommend_movie(movie_input)

Enter a movie you like: 500 days of summer

🎬 Because you watched '(500) Days of Summer', you might like:

1. Inception (Score: 8.0)
2. 10 Things I Hate About You (Score: 6.93)
3. The Equalizer (Score: 6.9)
4. If I Stay (Score: 6.88)
5. Treasure Planet (Score: 6.72)
6. 50/50 (Score: 6.67)
7. Stranger Than Fiction (Score: 6.59)
8. The Walk (Score: 6.58)
9. Looper (Score: 6.53)
10. High Fidelity (Score: 6.51)
11. The Amazing Spider-Man (Score: 6.46)
12. The Amazing Spider-Man 2 (Score: 6.44)
13. Bridesmaids (Score: 6.37)
14. The Young Victoria (Score: 6.37)
15. Beginners (Score: 6.32)
16. Hesher (Score: 6.23)
17. Knocked Up (Score: 6.16)
18. Practical Magic (Score: 6.16)
19. Premium Rush (Score: 6.15)
20. Road House (Score: 6.15)
21. Music and Lyrics (Score: 6.14)
22. The End of the Affair (Score: 6.13)
23. I Heart Huckabees (Score: 6.12)
24. Winter Passing (Score: 6.11)
25. Eulogy (Score: 6.11)
