<a href="https://colab.research.google.com/github/yckamra/MovieLens25m-Recommender/blob/main/Benchmarking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
def precision_k(predictionList, actualList, k):

  predictionDict = dict.fromkeys(predictionList) # Remove duplicates in predictionList without losing order
  noDupesPredictionList = list(predictionDict.keys())[:k] # Create list minus the duplicates

  actualDict = dict.fromkeys(actualList) # Remove duplicates in actualList without losing order
  noDupesActualList = list(actualDict.keys()) # Create list minus the duplicates

  # Resolves division by zero
  if k <= 0:
    return 0

  collision_map = {}
  similar_movies = 0

  for movie in noDupesPredictionList: # Puts all movie predictions in a dictionary
    if movie not in collision_map:
      collision_map[movie] = movie

  for movie in noDupesActualList:
    if movie in collision_map:
      similar_movies += 1 # Increments movies in both lists when there is a collision

  precision_k = similar_movies / k

  return precision_k

In [None]:
def recall_k(predictionList, actualList, k):

  predictionDict = dict.fromkeys(predictionList) # Remove duplicates in predictionList without losing order
  noDupesPredictionList = list(predictionDict.keys())[:k] # Create list minus the duplicates

  actualDict = dict.fromkeys(actualList) # Remove duplicates in actualList without losing order
  noDupesActualList = list(actualDict.keys()) # Create list minus the duplicates

  # Resolves division by zero
  if len(noDupesActualList) == 0:
    return 0

  collision_map = {}
  similar_movies = 0

  for movie in noDupesPredictionList: # Puts all movie predictions in a dictionary
    if movie not in collision_map:
      collision_map[movie] = movie

  for movie in noDupesActualList:
    if movie in collision_map:
      similar_movies += 1 # Increments movies in both lists when there is a collision

  recall_k = similar_movies / len(noDupesActualList)

  return recall_k

In [None]:
def f1_k(predictionList, actualList, k):

  precision = precision_k(predictionList, actualList, k)
  recall = recall_k(predictionList, actualList, k)
  f1_score = 2 * ((precision * recall) / (precision + recall))

  return f1_score

In [None]:
def NDCG_k(predictionList, actualList, k): # Normalized Discounted Cumulative Gain

  return

In [None]:
def contains_relevant_item(predictionList, actualList, k):
  predictionDict = dict.fromkeys(predictionList) # Remove duplicates in predictionList without losing order
  noDupesPredictionList = list(predictionDict.keys())[:k] # Create list minus the duplicates

  actualDict = dict.fromkeys(actualList) # Remove duplicates in actualList without losing order
  noDupesActualList = list(actualDict.keys()) # Create list minus the duplicates

  collision_map = {}

  for movie in noDupesPredictionList: # Puts all movie predictions in a dictionary
    if movie not in collision_map:
      collision_map[movie] = movie

  for movie in noDupesActualList:
    if movie in collision_map:
      return True

  return False

In [None]:
def mean_precision_k(allUsers, allPredictions, k): # allUsers is a dictionary with (userId, userObject) key-value pairs and allPredictions is a dictionary with (userId, predictionList)
  total_users = 0
  sum_of_precisions = 0

  for user in allUsers: # user is a userID so we can use it for both allUsers and allPredictions
    total_users += 1
    sum_of_precisions += precision_k(allPredictions[user], allUsers[user].movies, k)

  if total_users == 0:
    return 0

  mean_precision = sum_of_precisions / total_users

  return mean_precision

In [None]:
def mean_recall_k(allUsers, allPredictions, k): # allUsers is a dictionary with (userId, userObject) key-value pairs
  total_users = 0
  sum_of_recalls = 0

  for user in allUsers:
    total_users += 1
    sum_of_recalls += recall_k(allPredictions[user], allUsers[user].movies, k)

  if total_users == 0:
    return 0

  mean_recall = sum_of_recalls / total_users

  return mean_recall

In [None]:
def mean_f1_k(allUsers, allPredictions, k): # allUsers is a dictionary with (userId, userObject) key-value pairs
  total_users = 0
  sum_of_f1 = 0

  for user in allUsers:
    total_users += 1
    sum_of_f1 += f1_k(allPredictions[user], allUsers[user].movies, k)

  if total_users == 0:
    return 0

  mean_f1 = sum_of_f1 / total_users

  return mean_f1

In [None]:
def mean_NDCG_k(allUsers, allPredictions, k): # allUsers is a dictionary with (userId, userObject) key-value pairs
  return

In [None]:
def total_hit_rate(allUsers, allPredictions, k): # allUsers is a dictionary with (userId, userObject) key-value pairs
  hit_rate = 0
  num_users = 0

  for user in allUsers:
    if contains_relevant_item(allPredictions[user], allUsers[user].movies, k):
      hit_rate += 1
    num_users += 1

  return hit_rate, num_users

In [None]:
def evaluate_model(allUsers, allPredictions, k): # allUsers is a dictionary with (userId, userObject) key-value pairs and predictions is a 2D matrix

  precision = mean_precision_k(allUsers, allPredictions, k)
  recall = mean_recall_k(allUsers, allPredictions, k)
  f1 = mean_f1_k(allUsers, allPredictions, k)
  NDCG = mean_NDCG_k(allUsers, allPredictions, k)
  hit_rate, num_users = total_hit_rate(allUsers, allPredictions, k)

  return precision, recall, f1, NDCG, hit_rate, num_users