**Task 1: Reading Data**

In [5]:
from collections import defaultdict

In [66]:
#input: ratings file
#output: dictionary; key = movie; value = corresponding list of ratings
def read_ratings_data(f):
    file = open(f)
    d = defaultdict(list)
    for line in file:
        movie, rating, user_id = line.split('|')
        d[movie.strip()].append(float(rating.strip()))
    file.close()
    return dict(d)

In [67]:
movie_to_ratings = read_ratings_data('movieRatingSample.txt')
print(movie_to_ratings)

{'Toy Story (1995)': [4.0, 4.0, 4.5, 2.5, 4.5, 3.5], 'Jumanji (1995)': [4.0, 4.0, 3.0, 3.0, 3.0, 3.5], 'Tom and Huck (1995)': [3.0, 1.0, 5.0, 3.0, 2.0, 3.0], 'Grumpier Old Men (1995)': [4.0, 5.0, 3.0, 3.0, 4.0, 5.0], 'Waiting to Exhale (1995)': [3.0, 3.0, 3.0, 3.0, 1.0, 2.0], 'Father of the Bride Part II (1995)': [5.0, 3.0, 5.0, 3.0, 4.0, 4.0], 'Heat (1995)': [4.0, 4.0, 5.0, 4.0, 4.0, 4.5], 'Sudden Death (1995)': [4.0, 3.0, 3.0, 2.0, 3.0, 5.0], 'GoldenEye (1995)': [3.0, 2.0, 3.0, 2.0, 5.0, 3.0]}


In [8]:
#input: movies file
#output: dictionary; key = movie; value = genre
def read_movie_genre(f):
    file = open(f)
    d = {}
    for line in file:
        genre, movie_id, movie = line.split('|')
        d[movie.strip()] = genre.strip()
       
    file.close()
    return d
    

In [11]:
movie_to_genre = read_movie_genre('genreMovieSample.txt')
print(movie_to_genre)

{'Toy Story (1995)': 'Adventure', 'Jumanji (1995)': 'Adventure', 'Tom and Huck (1995)': 'Adventure', 'Grumpier Old Men (1995)': 'Comedy', 'Waiting to Exhale (1995)': 'Comedy', 'Father of the Bride Part II (1995)': 'Comedy', 'Heat (1995)': 'Action', 'Sudden Death (1995)': 'Action', 'GoldenEye (1995)': 'Action'}


**Task 2: Processing Data**

In [68]:
#input: movie to genre dictionary
#output: dictionary; key = genre; value = list of movies in this genre
def create_genre_dict(movie_to_genre):
    d = defaultdict(list)
    for key, value in movie_to_genre.items():
        d[value].append(key)
    return dict(d)

In [69]:
genre_to_movies = create_genre_dict(movie_to_genre)
print(genre_to_movies)

{'Adventure': ['Toy Story (1995)', 'Jumanji (1995)', 'Tom and Huck (1995)'], 'Comedy': ['Grumpier Old Men (1995)', 'Waiting to Exhale (1995)', 'Father of the Bride Part II (1995)'], 'Action': ['Heat (1995)', 'Sudden Death (1995)', 'GoldenEye (1995)']}


In [70]:
#input: movie to ratings dictionary
#output: dictionary; key = movie; value = average rating

def calculate_average_rating(movie_to_ratings):
    d = {}
    for key, value in movie_to_ratings.items():
        avg = sum(value)/len(value)
        avg_s = f'{avg:.4f}'
        avg_f = float(avg_s)
        d[key] = avg_f
    return d


In [71]:
movie_to_avgRating = calculate_average_rating(movie_to_ratings)
print(movie_to_avgRating)

{'Toy Story (1995)': 3.8333, 'Jumanji (1995)': 3.4167, 'Tom and Huck (1995)': 2.8333, 'Grumpier Old Men (1995)': 4.0, 'Waiting to Exhale (1995)': 2.5, 'Father of the Bride Part II (1995)': 4.0, 'Heat (1995)': 4.25, 'Sudden Death (1995)': 3.3333, 'GoldenEye (1995)': 3.0}


**Task 3: Recommendation**

In [18]:
#1
#input: movie to average ratings dictionary, n
#output: dictionary of same structure but only with top n movies rating wise
def get_popular_movies(movie_to_avg_rating, n=10):
    
    #gives a list sorted in descending order based on value
    d = sorted(movie_to_avg_rating.items(), key = lambda x: x[1], reverse = True)
    
    if len(d) < n:
        n = len(d)
        
    d_final = d[:n]
    
    #create a dictionary from this truncated list
    
    return dict(d_final)
    

In [72]:
print(get_popular_movies(movie_to_avgRating, 3))

{'Heat (1995)': 4.25, 'Grumpier Old Men (1995)': 4.0, 'Father of the Bride Part II (1995)': 4.0}


In [20]:
#2
#input: movie to average ratings dictionary, t
#output: dictionary of the same form but only including those items with value >= t
def filter_movies(movie_to_avg_rating, t=3):
    
    d = {}
    for key, value in movie_to_avg_rating.items():
        if value >= t:
            d[key] = value
            
            
    return d
        

In [73]:
print(filter_movies(movie_to_avgRating, 2))

{'Toy Story (1995)': 3.8333, 'Jumanji (1995)': 3.4167, 'Tom and Huck (1995)': 2.8333, 'Grumpier Old Men (1995)': 4.0, 'Waiting to Exhale (1995)': 2.5, 'Father of the Bride Part II (1995)': 4.0, 'Heat (1995)': 4.25, 'Sudden Death (1995)': 3.3333, 'GoldenEye (1995)': 3.0}


In [22]:
#3
#input: genre, a genre-to-movies dictionary, a dictionary of movie:average rating, n 
#output: n most popular movies in that genre based on average ratings
def get_popular_in_genre(genre, genre_to_movies, movie_to_avg, n=5):
    
    value = genre_to_movies[genre]
    #value is the list of movies in this particular genre
    
    #create a list of tuples. tuple: (movie, avg rating) such that movie is in the value list
    d = [(x,y) for x,y in movie_to_avg.items() if x in value]
    
    if len(d) < n:
        n=len(d)
        
    d.sort(key = lambda x : x[1], reverse = True)
    
    d_final = d[:n]
    
    return dict(d_final)
    

In [78]:
print(get_popular_in_genre("Adventure", genre_to_movies, movie_to_avgRating))

{'Toy Story (1995)': 3.8333, 'Jumanji (1995)': 3.4167, 'Tom and Huck (1995)': 2.8333}


In [79]:
#4
#input: genre, a genre-to-movies dictionary, a dictionary of movie:average rating
#output: average rating of movies in the given genre
def get_genre_rating(genre, genre_to_movies, movie_to_avg):
    
    value = genre_to_movies[genre]
    #value is the list of movies in this particular genre
    
    sum = 0
    count = len(value)
    
    for x,y in movie_to_avg.items():
        if x in value:
            sum+=y
            
    avg = sum/count
    avg_s =  f'{avg:.4f}'
    avg_f = float(avg_s)
    
    return avg_f
        

In [80]:
print(get_genre_rating("Adventure", genre_to_movies, movie_to_avgRating))

3.3611


In [81]:
print(get_genre_rating("Action", genre_to_movies, movie_to_avgRating))

3.5278


In [82]:
print(get_genre_rating("Comedy", genre_to_movies, movie_to_avgRating))

3.5


In [83]:
#5
#input: genre_to_movies dictionary, movie_to_avgRating dictionary, n
#output: top n rated genres as a dictionary of genre:average rating

def genre_popularity(genre_to_movies, movie_to_avgRating, n=5):
    
    #list of tuples (genre, averageRating)
    l = []
    for x in genre_to_movies.keys():
        avgRating = get_genre_rating(x, genre_to_movies, movie_to_avgRating)
        l.append((x, avgRating))
    
    l.sort(key = lambda x: x[1], reverse = True)
    
    if len(l) < n:
        n = len(l)
        
    return dict(l[0:n])
        

In [84]:
print(genre_popularity(genre_to_movies, movie_to_avgRating))

{'Action': 3.5278, 'Comedy': 3.5, 'Adventure': 3.3611}


In [85]:
print(genre_popularity(genre_to_movies, movie_to_avgRating, 1))

{'Action': 3.5278}


In [86]:
print(genre_popularity(genre_to_movies, movie_to_avgRating, 2))

{'Action': 3.5278, 'Comedy': 3.5}


**Task 4 (User Focused)**

In [87]:
#1
def read_user_ratings(ratings):
    f = open(ratings)
    
    d = defaultdict(list)
    
    for line in f:
        
        movie, rating, uid = line.split('|')
        d[uid.strip()].append((movie.strip(), float(rating.strip())))
    
    f.close()
    return dict(d)

In [90]:
user_to_rating = read_user_ratings('movieRatingSample.txt')
print (user_to_rating)
#read_user_ratings('movieRatingSample.txt')

{'1': [('Toy Story (1995)', 4.0), ('Grumpier Old Men (1995)', 4.0), ('Heat (1995)', 4.0)], '5': [('Toy Story (1995)', 4.0)], '7': [('Toy Story (1995)', 4.5)], '15': [('Toy Story (1995)', 2.5)], '17': [('Toy Story (1995)', 4.5)], '18': [('Toy Story (1995)', 3.5), ('Jumanji (1995)', 3.0), ('Heat (1995)', 4.0)], '6': [('Jumanji (1995)', 4.0), ('Tom and Huck (1995)', 3.0), ('Grumpier Old Men (1995)', 5.0), ('Waiting to Exhale (1995)', 3.0), ('Father of the Bride Part II (1995)', 5.0), ('Heat (1995)', 4.0), ('GoldenEye (1995)', 3.0)], '8': [('Jumanji (1995)', 4.0), ('GoldenEye (1995)', 2.0)], '19': [('Jumanji (1995)', 3.0), ('Grumpier Old Men (1995)', 3.0), ('GoldenEye (1995)', 2.0)], '20': [('Jumanji (1995)', 3.0), ('Tom and Huck (1995)', 1.0)], '21': [('Jumanji (1995)', 3.5), ('GoldenEye (1995)', 5.0)], '43': [('Tom and Huck (1995)', 5.0), ('Grumpier Old Men (1995)', 5.0), ('Father of the Bride Part II (1995)', 5.0)], '274': [('Tom and Huck (1995)', 3.0)], '372': [('Tom and Huck (1995)', 

In [109]:
#2
#input: user id, the user-to-movies dictionary, and the movie-to-genre dictionary 
#output: top genre the user likes
def get_user_genre(uid, user_to_movies, movie_to_genre):
    
    if uid not in user_to_movies.keys():
        print('Error: User does not exist')
        return
    
    
    #creating a dictionary that is applicable to a particular user
    #key: genre 
    #value: (total rating given, #of movies that this total applies to)
    genre_ratings = {}
    
    value = user_to_movies[uid]
    #value holds [(Movie, rating), ....]
    
  
    #go through each item in value to finally create the dictionary
    
    for movie, rating in value:
        genre = movie_to_genre[movie];
        if genre in genre_ratings:
            totalRating, numMovies = genre_ratings[genre]
            genre_ratings[genre] = (totalRating + rating, numMovies + 1)
        else:
            genre_ratings[genre] = (rating,1)
        
    
    
    #creating a list of tuples. Each tuple: (genre, avgRating by uid)
    
    genre_ratings_list = []
    
    for genre, rating_stats in genre_ratings.items():
        genre_ratings_list.append((genre, rating_stats[0]/rating_stats[1]))
        
    genre_ratings_list.sort(key = lambda x: x[1], reverse = True)
    
    
    a,b = genre_ratings_list[0]
    return a
        
    
    

In [110]:
print(get_user_genre('1', user_to_rating, movie_to_genre ))

Adventure


In [111]:
print(get_user_genre('18', user_to_rating, movie_to_genre ))

Action


In [112]:
print(get_user_genre('6', user_to_rating, movie_to_genre ))

Comedy


In [113]:
print(get_user_genre('0', user_to_rating, movie_to_genre ))

Error: User does not exist
None


In [108]:
#3
#input: user id, user to movies dictionary, movie to genre dictionary, movie to avgRating dictionary
#output: movie to average rating dictionary
def recommend_movies1(uid, user_to_movies, movie_to_genre, movie_to_avgRating):
    
    if uid not in user_to_movies.keys():
        print('Error: User does not exist')
        return
    
    top_genre = get_user_genre(uid, user_to_movies, movie_to_genre)
    
    #[(movie, rating), .....]
    movies_rated = user_to_movies[uid] 
    
    #l1 is a list of all movies in this genre
    l1 = [x for x,y in movie_to_genre.items() if y == top_genre]
    
    #l2 is a list of movies in this genre already rated by the user
    l2 = [x for x,y in movies_rated if movie_to_genre[x] == top_genre]
    
    #remove values in l2 from l1
    for l2element in l2:
        l1.remove(l2element)
        
    #now l1 has all the movies of the top_genre that the user uid did not rate
    
    #creating l3 with tuples (movie in l1, avgRating of movie)
    
    l3 = [(x,movie_to_avgRating[x]) for x in l1]
    
    l4 = sorted(l3, key= lambda x: x[1], reverse = True)
    
    n = 3
    
    if len(l4) < 3:
        n= len(l4)
        
    return dict(l4[:n])
    
    
    

In [65]:
#print(recommend_movies('18', user_to_rating, movie_to_genre, movie_to_avgRating))

{'Sudden Death (1995)': 3.33, 'GoldenEye (1995)': 3.0}


In [92]:
#print(recommend_movies('6', user_to_rating, movie_to_genre, movie_to_avgRating))

{'Sudden Death (1995)': 3.3333}


In [119]:
def recommend_movies(uid, user_to_rating, movie_to_genre, movie_to_avgRating):
    
    if uid not in user_to_rating.keys():
        print('Error: User does not exist')
        return
    
    top_choice = get_user_genre(uid, user_to_rating, movie_to_genre)
    
    #all the movies in the genre
    movies_in_genre = [x for x,y in movie_to_genre.items() if y == top_choice]
    #all the movies that the user has watched
    movies_watched = [m for m, r in user_to_rating[uid]]
    
    recommend = {}
    for movie in movies_in_genre:
        if movie not in movies_watched:
            recommend[movie] = movie_to_avgRating[movie]

    recommend = sorted(recommend.items(), key=lambda item: item[1], reverse = True)
    
    if len(recommend) <=3:
        n = len(recommend)
        
    return dict(recommend[:n])

In [120]:
print(recommend_movies('18', user_to_rating, movie_to_genre, movie_to_avgRating))

{'Sudden Death (1995)': 3.3333, 'GoldenEye (1995)': 3.0}


In [121]:
print(recommend_movies('6', user_to_rating, movie_to_genre, movie_to_avgRating))

{}
