## Basic Recommendation System on IMDB Movie Dataset

In [0]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Step 1: Read CSV File

In [0]:
movie_recommondation_file = pd.read_csv(r"C:\Users\YASHA\z.My_first_recommendation_system\1st_recomd_sys\movie_dataset.csv")
print(movie_recommondation_file.head())

   index     budget                                    genres  \
0      0  237000000  Action Adventure Fantasy Science Fiction   
1      1  300000000                  Adventure Fantasy Action   
2      2  245000000                    Action Adventure Crime   
3      3  250000000               Action Crime Drama Thriller   
4      4  260000000          Action Adventure Science Fiction   

                                       homepage      id  \
0                   http://www.avatarmovie.com/   19995   
1  http://disney.go.com/disneypictures/pirates/     285   
2   http://www.sonypictures.com/movies/spectre/  206647   
3            http://www.thedarkknightrises.com/   49026   
4          http://movies.disney.com/john-carter   49529   

                                            keywords original_language  \
0  culture clash future space war space colony so...                en   
1  ocean drug abuse exotic island east india trad...                en   
2         spy based on novel sec

In [0]:
print(movie_recommondation_file.columns)

Index(['index', 'budget', 'genres', 'homepage', 'id', 'keywords',
       'original_language', 'original_title', 'overview', 'popularity',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'vote_average', 'vote_count', 'cast', 'crew', 'director'],
      dtype='object')


In [0]:
print(movie_recommondation_file.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
index                   4803 non-null int64
budget                  4803 non-null int64
genres                  4775 non-null object
homepage                1712 non-null object
id                      4803 non-null int64
keywords                4391 non-null object
original_language       4803 non-null object
original_title          4803 non-null object
overview                4800 non-null object
popularity              4803 non-null float64
production_companies    4803 non-null object
production_countries    4803 non-null object
release_date            4802 non-null object
revenue                 4803 non-null int64
runtime                 4801 non-null float64
spoken_languages        4803 non-null object
status                  4803 non-null object
tagline                 3959 non-null object
title                   4803 non-null object
vote_average            4803 non-null fl

## Step 2: Data Manipulation

#### As the data has null values but for this case in order to generalize the and keep work on steady flow we will fill the value with the space.

In [0]:
movie_recommondation_file = movie_recommondation_file.fillna(" ")

## Step 3: Select Features

#### I am selecting important features which will I will use to help the user to recommend the end user.

In [0]:
important_features = ['keywords','genres','title','director']
print(important_features)

['keywords', 'genres', 'title', 'director']


## Step 4: Create a column in DF which combines all selected features

#### I want to culb the all feartures as one and defininig a function so that get any error is there

In [0]:
def club_features(row):
    try:
        return row['keywords'] + ' ' + row['genres'] + ' ' + row['title'] + ' ' + row['director']
    except:
        print('Something is wrong, Error in row :', row)

movie_recommondation_file["Clubed_features"] = movie_recommondation_file.apply(club_features, axis = 1)
print(movie_recommondation_file["Clubed_features"].head())

0    culture clash future space war space colony so...
1    ocean drug abuse exotic island east india trad...
2    spy based on novel secret agent sequel mi6 Act...
3    dc comics crime fighter terrorist secret ident...
4    based on novel mars medallion space travel pri...
Name: Clubed_features, dtype: object


## Step 5: Initialize the Count Vectorizer as CV from Sklearn.

In [0]:
Con_vec = CountVectorizer()

## Step 6: Create count matrix from this new combined column using fit_transform function from sklearn.

In [0]:
Count_of_movie_matrix = Con_vec.fit_transform(movie_recommondation_file['Clubed_features'])

## Step 7: Compute the Cosine Similarity based on the count_matrix make the simarlity matrix.

In [0]:
Cosine_similarity_matrix = cosine_similarity(Count_of_movie_matrix)

## Step 8: Take the input from the user which movie he likes.

In [0]:
movie_user_likes = "Avatar"

## Step 9: Get index of this movie from its title.

#### In order to get the title from the list. I am creating a fuction with that function, I am able to move inside the matrix.

In [0]:
def get_index_from_title(title):
    return movie_recommondation_file[movie_recommondation_file.title == title]["index"].values[0]

Movie_title_index = get_index_from_title(movie_user_likes)

## Step 10: Get a list of similar movies in descending order of similarity score.

In [0]:
Similar_movies = list(enumerate(Cosine_similarity_matrix[Movie_title_index]))

Sort_similar_movies = sorted(Similar_movies, key = lambda x :x[1],reverse = True)

## Step 11: Print titles of first 5 movies.

##### First define a function to get title form the index.                                                                                                                                   Then intialize the number of recommendation movies then create a loop to print all the outputs

In [0]:
def get_title_from_index(index):
    return movie_recommondation_file[movie_recommondation_file.index == index]["title"].values[0]

No_of_recommendation = 0
for movie in Sort_similar_movies:
    print(get_title_from_index(movie[0]))
    No_of_recommendation += 1
    if No_of_recommendation > 5:
        break

Avatar
Guardians of the Galaxy
Jason X
Zathura: A Space Adventure
Moonraker
Aliens


Thank you...Please provide your valuable feedback...