In [None]:
# from section1 task 9
def get_total_ratings(item_list, ni_dict):
    total = 0
    for movie_id in item_list:
        if movie_id in ni_dict:
            total = total + ni_dict[movie_id]
    return total

In [None]:
# from section1 task 9
def quick_sort(arr):
    # Base case: if list is empty or has 1 item, it's already sorted
    if len(arr) <= 1:
        return arr
    
    pivot = arr[len(arr) // 2] # Choose middle element as pivot
    left = []
    middle = []
    right = []
    
    for x in arr:
        # Compare the count (index 1 of the inner list)
        if x[1] < pivot[1]:
            left.append(x)
        elif x[1] == pivot[1]:
            middle.append(x)
        else:
            right.append(x)
            
    # Recursive calls
    return quick_sort(left) + middle + quick_sort(right)

In [None]:
# from section1 task 14
def manual_intersection_size(set1, set2):
    count = 0
    for x in set1:
        for y in set2:
            if x == y:
                count += 1
                break
    return count


In [None]:
# helper function 
# Merge Sort implementation to sort by count
def merge_sort(arr):
    if len(arr) <= 1:
        return arr
    
    mid = len(arr) // 2
    left = merge_sort(arr[:mid])
    right = merge_sort(arr[mid:])
    
    return merge(left, right)

def merge(left, right):
    result = []
    i = j = 0
    
    while i < len(left) and j < len(right):
        # Compare counts (index 1 in tuple)
        if left[i][1] <= right[j][1]:
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1
            
    # Append leftovers
    result.extend(left[i:])
    result.extend(right[j:])
    
    return result


In [2]:
# ---- Helper functions ----

def common_items(u1_dict, u2_dict):
    com = []
    for x in u1_dict:
        for y in u2_dict:
            if x == y:
                com.append(x)
                break
    return com

In [None]:

# user_user cosine similarity
def cosine_similarity(u1_dict, u2_dict):
    com = common_items(u1_dict, u2_dict)

    if len(com) == 0:
        return 0.0

    dot = 0.0
    for item in com:
        dot += u1_dict[item] * u2_dict[item]

    norm1 = 0.0
    for item in u1_dict:
        norm1 += u1_dict[item] * u1_dict[item]

    norm2 = 0.0
    for item in u2_dict:
        norm2 += u2_dict[item] * u2_dict[item]

    if norm1 == 0 or norm2 == 0:
        return 0.0

    sim = dot / ((norm1**0.5) * (norm2**0.5))
    return round(sim, 4)


In [None]:
# helper function
def top_20_percent_desc(sorted_list):
    n = len(sorted_list)
    k = int(n * 0.2)
    if k < 1:
        k = 1

    # Take last k elements (top scores)
    top_users = sorted_list[-k:]

    # Reverse to make highest first
    top_users.reverse()

    # Round values INSIDE the list
    for i in range(len(top_users)):
        uid, sim = top_users[i]
        top_users[i] = (uid, round(sim, 2))

    return top_users


In [None]:
# ---- Helper function to predict rating ----
def predict_rating(target_user, item, top_sim_users, user_ratings):
    numerator = 0.0
    denominator = 0.0
    
    for (other_user, similarity) in top_sim_users:
        # Only consider users who rated the item
        if item in user_ratings.get(other_user, {}):
            r = user_ratings[other_user][item]
            numerator += similarity * r
            denominator += abs(similarity)  # use abs(similarity) to avoid negative impact
    
    if denominator == 0:
        return None  # Cannot predict without any rating info
    
    predicted = numerator / denominator
    return round(predicted, 2)


In [None]:
# Mean-Centered Cosine Similarity function
def mean_centered_cosine(u1_dict, u2_dict, u1_mean, u2_mean):
    com = common_items(u1_dict, u2_dict)
    if len(com) == 0:
        return 0.0
    
    dot = norm1 = norm2 = 0.0
    for item in com:
        dev1 = u1_dict[item] - u1_mean
        dev2 = u2_dict[item] - u2_mean
        dot += dev1 * dev2
        norm1 += dev1 ** 2
        norm2 += dev2 ** 2
    
    if norm1 == 0 or norm2 == 0:
        return 0.0
    
    return dot / ((norm1 ** 0.5) * (norm2 ** 0.5))


In [None]:
def pearson_correlation(u1_dict, u2_dict, u1_mean, u2_mean):
    com = common_items(u1_dict, u2_dict)
    if len(com) < 2:  # PCC undefined or zero with less than 2 common items
        return 0.0
    
    sum1 = sum2 = sum1_sq = sum2_sq = sum_prod = 0.0
    for item in com:
        dev1 = u1_dict[item] - u1_mean
        dev2 = u2_dict[item] - u2_mean
        sum1 += dev1
        sum2 += dev2
        sum1_sq += dev1 ** 2
        sum2_sq += dev2 ** 2
        sum_prod += dev1 * dev2
    
    numerator = sum_prod
    denominator = (sum1_sq * sum2_sq) ** 0.5
    
    if denominator == 0:
        return 0.0
    
    pcc = numerator / denominator
    return round(pcc, 2)

In [None]:
def mean_center(item_dict):
    mean_rating = sum(item_dict.values()) / len(item_dict)
    centered = {}
    for u in item_dict:
        centered[u] = item_dict[u] - mean_rating
    return centered


In [None]:
def common_users(i1_dict, i2_dict):
    com = []
    for u in i1_dict:
        if u in i2_dict:
            com.append(u)
    return com


In [None]:
def cosine_similarity_items(i1_dict_raw, i2_dict_raw):
    # mean-center the rating vectors
    i1_dict = mean_center(i1_dict_raw)
    i2_dict = mean_center(i2_dict_raw)

    com = common_users(i1_dict, i2_dict)
    if len(com) == 0:
        return 0.0

    # dot product
    dot = 0.0
    for u in com:
        dot += i1_dict[u] * i2_dict[u]

    # norms
    norm1 = sum(v*v for v in i1_dict.values()) ** 0.5
    norm2 = sum(v*v for v in i2_dict.values()) ** 0.5

    if norm1 == 0 or norm2 == 0:
        return 0.0

    return round(dot / (norm1 * norm2), 2)


In [None]:
def pcc_similarity_items(i1_dict, i2_dict):
    # Find common users
    com = common_users(i1_dict, i2_dict)
    if len(com) < 2:
        return 0.0  # PCC undefined for <2 points

    # Extract rating vectors
    x = [i1_dict[u] for u in com]
    y = [i2_dict[u] for u in com]

    # Means
    mean_x = sum(x) / len(x)
    mean_y = sum(y) / len(y)

    # Numerator
    num = sum((x[i] - mean_x) * (y[i] - mean_y) for i in range(len(com)))

    # Denominator
    den_x = sum((x[i] - mean_x) ** 2 for i in range(len(com))) ** 0.5
    den_y = sum((y[i] - mean_y) ** 2 for i in range(len(com))) ** 0.5

    if den_x == 0 or den_y == 0:
        return 0.0

    return round(num / (den_x * den_y), 2)
