In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix, vstack
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
with open('assets/csr.pkl', "rb") as fIn:
    csr = pickle.load(fIn)
    
with open('assets/courses.pkl', "rb") as fIn:
    courses = pickle.load(fIn)

df = pd.read_csv('assets/fw.csv')

In [3]:
def memory_based(user_input, top_n=10):

    if top_n > 10:
        print('Please enter the number of requested recommendations fewer than 10.')
        pass
    
    else:

        for course in user_input:
            if course in courses:
                # Memory-based collaborative filtering
                user_idx = -1

                user_row = np.isin(courses, user_input)
                uc_mat = vstack([csr, user_row]).tocsr()
                # Normalize user-course matrix
                norm_uc_mat = uc_mat - uc_mat.mean(axis = 1).reshape(-1,1)

                # Cosine similarity of raw matrix
                cossim = cosine_similarity(uc_mat)

                # Calculate the numerator of V-hat(aj)
                v_num = np.dot(cossim[user_idx,:], norm_uc_mat)

                # Sum of V-hat(aj) numerator
                v_num_sum = v_num.sum(axis = 0)

                # V_hat(aj)
                v_hat = v_num_sum/(cossim[user_idx,:].sum())

                #Predicted rating of each course
                pred_rating = np.add(v_hat, uc_mat.mean(axis = 1).reshape(-1,1)[user_idx])

                # User vector
                user_vec = uc_mat[user_idx].toarray().flatten()

                # User's favorite course indices
                user_fav_idx = np.argsort(-user_vec)

                # User's favorite courses
                user_fav = [courses[i] for i in user_fav_idx[:top_n]]

                # Flatten pred_rating
                pred_rating = np.asarray(pred_rating).flatten()

                # Argsort the rating except the courses taken by the user
                recommend_index = np.argsort(-pred_rating[user_vec == 0])

                # Recommend top courses
                top_courses = [courses[i] for i in recommend_index]

                # Get top_n offered courses that are recommended 
                offered = []
                count = 0
                for t in top_courses:
                    if t in list(df['course']):
                        offered.append(t)
                        count += 1
                        if count >= top_n:
                            break

                rec_df = df.set_index('course', drop = True)
                rec_df = rec_df.loc[offered].reset_index()
                rec_df = rec_df[['course', 'Term', 'Subject', 'Course Title', 'description', 'credits', 'requirements_distribution']]

                return rec_df

            else:
                return print('At least one of the courses entered not valid.')      

In [None]:
user_input = []

for i in range(10):
    course = input(f'Course {i+1}: Please enter a course you have taken. One course at a time.')
    user_input.append(course)

In [None]:
memory_based(user_input)