# Install and load necesary packages

In [1]:
# Please don't change this cell

import pandas as pd
import numpy as np  

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Please don't change this cell
df = pd.read_csv('ml-100k/u.data', names=['user_id', 'item_id', 'rating', 'timestamp'], sep='\t')

df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


# Split dataset
## Random Train and Test Split

In [3]:
from sklearn.model_selection import train_test_split

n_users = df.user_id.unique().shape[0]
n_items = df.item_id.unique().shape[0]
print(str(n_users) + ' users')
print(str(n_items) + ' items')

train_df, test_df = train_test_split(df, test_size=0.2, random_state = 10)
train_df, test_df

# Training Dataset
train_ds = np.zeros((n_users, n_items))
for row in train_df.itertuples():
    train_ds[row[1]-1, row[2]-1] = row[3]
#train_ds = pd.DataFrame(train_ds)

# Testing Dataset
test_ds = np.zeros((n_users, n_items))
for row in test_df.itertuples():
    test_ds[row[1]-1, row[2]-1] = row[3]
#test_ds = pd.DataFrame(test_ds)

print("Construct the rating matrix based on train_df:")
print(train_ds)

print("Construct the rating matrix based on test_df:")
print(test_ds)

943 users
1682 items
Construct the rating matrix based on train_df:
[[0. 3. 4. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 5. 0. ... 0. 0. 0.]]
Construct the rating matrix based on test_df:
[[5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


# Utils

In [None]:
EPSILON = 1e-9

def evaluate(test_ds, predicted_ds):
    '''
    Function for evaluating on MAE and RMSE
    '''
    # MAE
    mask_test_ds = test_ds > 0
    MAE = np.sum(np.abs(test_ds[mask_test_ds] - predicted_ds[mask_test_ds])) / np.sum(mask_test_ds.astype(np.float32))

    # RMSE
    RMSE = np.sqrt(np.sum(np.square(test_ds[mask_test_ds] - predicted_ds[mask_test_ds])) / np.sum(mask_test_ds.astype(np.float32)))

    return MAE, RMSE

# Your Solution

In [None]:


MAE = 0 # 0 is an intial value, you need to update this with the actual perofrmance of your implementation.
RMSE = 0 # 0 is an intial value, you need to update this with the actual perofrmance of your implementation.



# Setting the predefined lambda value
# As we do personalised weighted slope one, so keeping lamda close to zero will be good
pre_defined_lambda = 0.3

# Calculate similarities between current user and all users in train_ds and storing in 'similarities' array
similar_users = np.zeros(train_ds.shape[0])
# for now setting current user to user 0
curr_user = 0
current_rating = train_ds[curr_user]
for i in range(train_ds.shape[0]):
    # Skipping the loop when i is equal to current user.
    if i != curr_user:
        rating = train_ds[i]
        # Finding the common ratings between 2 users when their rating is greater than 0.
        common_rating = np.logical_and(current_rating > 0, rating > 0)
        # Calculating the similarity when the common rating is greater than 0.
        if np.sum(common_rating) > 0:
            # Subtracting the rating value with the mean of commononly rated items to find Pearson coefficient
            mean_current_rating = current_rating[common_rating] - np.mean(current_rating[common_rating])
            mean_user_rating = rating[common_rating] - np.mean(rating[common_rating])
            # Apply centered cosine formula
            similarity = np.sum(mean_current_rating * mean_user_rating) / np.sqrt(np.sum(mean_current_rating ** 2) * np.sum(mean_user_rating ** 2))
            similar_users[i] = similarity

            
# Replacing the Nan Values with zero for calculation
similar_users = np.where(np.isnan(similar_users), 0, similar_users)  


# Initializing the deviation and cardinality matrix with zero
dev = np.zeros((n_items, n_items))
cardinality_matrix = np.zeros((n_items, n_items))

for item_j in range(n_items):
    for item_i in range(n_items):
        # Skipping the loop when item_i = item_j as it will be 0.
        if item_i != item_j:
            # Finding the common users
            common_users = np.logical_and(train_ds[:, item_i] > 0, train_ds[:, item_j] > 0)
            # cardinality is total number of users rated same item
            cardinality = np.sum(common_users)
            if cardinality > 0:
                rating_j = train_ds[common_users, item_j]
                rating_i = train_ds[common_users, item_i]
                # Calculating the first part of the formula representing the Slope One Deviation(item-item based)
                lhs = pre_defined_lambda * np.sum((rating_j - rating_i) / cardinality)
                # Calculating the second part of the formula representing deviation for personalize recommendation
                rhs = (1 - pre_defined_lambda) * (np.sum((rating_j - rating_i) * 2 ** (similar_users[np.where(common_users)[0]])) / (np.sum(2**(similar_users[np.where(common_users)[0]])) * cardinality))

                # Combining both the parts and storing in the dev matrix for predictions
                dev[item_j, item_i] = lhs + rhs
                
                cardinality_matrix[item_j, item_i] = cardinality
            else:
                # when no cardinality is 0
                dev[item_j, item_i] = 0.0
                cardinality_matrix[item_j, item_i] = 0.0

# Perform predictions
# predict for user x item
pred_test = np.zeros((n_users,n_items))
for i in range(n_users):
    for j in range(n_items):
        items = np.where(np.logical_and(train_ds[i] > 0, cardinality_matrix[j] > 0))[0]
        if len(items) > 0:
            # prediction formula
            prediction = np.sum((dev[items, j] + train_ds[i, items]) * cardinality_matrix[items, j]) / np.sum(cardinality_matrix[items, j])
            pred_test[i, j] = prediction

MAE, RMSE = evaluate(test_ds, pred_test)


In [7]:
# Please don't change this cell

print("===================== The MAE and RMSE of Your Implementation =====================")
print("MAE: {}, RMSE: {}" .format(MAE, RMSE))

MAE: 0.8938467172315683, RMSE: 1.1394117893501086
