# Update (12.13)
### read_date(file_path)
* input: file_path
* output: 
     - csr: csr matrix of user profile data
     - courses: list of courses
     
### memory_based(csr, courses, user_input, top_n = 10, random_state=42)
* input: 
    - csr
    - courses
    - user_input
    - csr, courses and the other inputs can be assigned inside the function. 
    
* output:
    - recommended courses
    
--------


In [12]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix, vstack
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

### Load csr and courses

In [6]:
with open('assets/csr.pkl', "rb") as fIn:
    csr = pickle.load(fIn)
with open('assets/courses.pkl', "rb") as fIn:
    courses = pickle.load(fIn)

## Matrix-based collaborative filtering

In [56]:
def memory_based(user_input, top_n = 10):
    
    for course in user_input:
        if course in courses:
            # Memory-based collaborative filtering
            user_idx = -1

            user_row = np.isin(courses, user_input)
            uc_mat = vstack([csr, user_row]).tocsr()
            # Normalize user-course matrix
            norm_uc_mat = uc_mat - uc_mat.mean(axis = 1).reshape(-1,1)

            # Cosine similarity of raw matrix
            cossim = cosine_similarity(uc_mat)

            # Calculate the numerator of V-hat(aj)
            v_num = np.dot(cossim[user_idx,:], norm_uc_mat)

            # Sum of V-hat(aj) numerator
            v_num_sum = v_num.sum(axis = 0)

            # V_hat(aj)
            v_hat = v_num_sum/(cossim[user_idx,:].sum())

            #Predicted rating of each course
            pred_rating = np.add(v_hat, uc_mat.mean(axis = 1).reshape(-1,1)[user_idx])

            # User vector
            user_vec = uc_mat[user_idx].toarray().flatten()

            # User's favorite course indices
            user_fav_idx = np.argsort(-user_vec)

            # User's favorite courses
            user_fav = [courses[i] for i in user_fav_idx[:top_n]]
            #print(f"Top {top_n} favorite courses:")
            #print(user_fav)

            # Flatten pred_rating
            pred_rating = np.asarray(pred_rating).flatten()

            # Argsort the rating except the courses taken by the user
            recommend_index = np.argsort(-pred_rating[user_vec == 0])

            # Recommend top courses
            top_courses = [courses[i] for i in recommend_index[:top_n]]

            print(f"Top {top_n} recommended courses:")
            #print(top_courses)

            #print()

            # RMSE between true & predicted
            rmse = np.sqrt(np.sum((user_vec[user_vec != 0] - pred_rating[user_vec != 0]) ** 2))
            #print(f"RMSE = {rmse}")
            
            return top_courses, rmse
            
        else:
            return print('At least one of the courses entered not valid.')      

In [57]:
np.random.choice(courses, size = 10, replace = False)

array(['SLAVI 313', 'CELL 112', 'HISTOR 320', 'HISTOR 227', 'MIDEAS 381',
       'WG 297', 'ASIANLA 303', 'LATINOA 315', 'AMA 498', 'MAT 175'],
      dtype='<U11')

In [60]:
user_input = ['SLAVI 313', 'CELL 112', 'HISTOR 320', 'HISTOR 227', 'MIDEAS 381',
       'WG 297', 'ASIANLA 303', 'LATINOA 315', 'AMA 498', 'MAT 175']

In [61]:
memory_based(user_input)

Top 10 recommended courses:


(['EECS 367',
  'EEC 481',
  'EECS 281',
  'EEC 280',
  'EEC 270',
  'MAT 403',
  'MAT 422',
  'MAT 451',
  'MAT 425',
  'MAT 480'],
 2.352093525317244)