<a href="https://colab.research.google.com/github/utkarsshsingh/Capstone-/blob/master/Electronics_Ratings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/gdrive/')
data_path = 'gdrive/My Drive/Capstone Project/Codes/datasets/'

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [0]:
import pandas as pd
import numpy as np
from scipy.optimize import fmin_cg

data_path = data_path + 'ratings_Electronics.csv'

In [0]:
# Only use the first 1000 items. 
# Try more items if you want, but more items will cost you more time to run the code
data_table = pd.read_csv(data_path, 
                         usecols=[0, 1, 2], 
                         names=['user_id', 'item_id', 'rating'],
                         header=None)[0: 10000]

df_ratings = pd.pivot_table(data_table, 
                            index='user_id', 
                            columns='item_id', 
                            values='rating',
                            aggfunc=np.max)

In [0]:
data_table.head()

Unnamed: 0,user_id,item_id,rating
0,AKM1MP6P0OYPR,132793040,5.0
1,A2CX7LUOHB2NDG,321732944,5.0
2,A2NWSAGRHCP8N5,439886341,1.0
3,A2WNBOD3WNDNKT,439886341,3.0
4,A1GI0U4ZRJA8WN,439886341,1.0


In [0]:
df_ratings.shape

(9826, 1305)

In [0]:
# The following functions are created to factorize the df_rating matrics to get the U and I matrics
# U and I represent User Matrics and Item Matrics respectively. 
# Do not dive deep into the technicial part!
def normalize_ratings(ratings):
    mean_ratings = np.nanmean(ratings, axis=0)
    return ratings - mean_ratings, mean_ratings


def cost(X, *args):
    num_users, num_products, num_features, ratings, mask, regularization_amount = args

    P = X[0:(num_users * num_features)].reshape(num_users, num_features)
    Q = X[(num_users * num_features):].reshape(num_products, num_features)
    Q = Q.T

    return (np.sum(np.square(mask * (np.dot(P, Q) - ratings))) / 2) + ((regularization_amount / 2.0) * np.sum(np.square(Q.T))) + ((regularization_amount / 2.0) * np.sum(np.square(P)))


def gradient(X, *args):
    num_users, num_products, num_features, ratings, mask, regularization_amount = args

    P = X[0:(num_users * num_features)].reshape(num_users, num_features)
    Q = X[(num_users * num_features):].reshape(num_products, num_features)
    Q = Q.T

    P_grad = np.dot((mask * (np.dot(P, Q) - ratings)), Q.T) + (regularization_amount * P)
    Q_grad = np.dot((mask * (np.dot(P, Q) - ratings)).T, P) + (regularization_amount * Q.T)

    return np.append(P_grad.ravel(), Q_grad.ravel())


def low_rank_matrix_factorization(ratings, mask=None, num_features=15, regularization=0.01):
    ratings = ratings.values
    num_users, num_products = ratings.shape

    if mask is None:
        mask = np.invert(np.isnan(ratings))

    ratings = np.nan_to_num(ratings)

    np.random.seed(0)
    P = np.random.randn(num_users, num_features)
    Q = np.random.randn(num_products, num_features)

    initial = np.append(P.ravel(), Q.ravel())

    args = (num_users, num_products, num_features, ratings, mask, regularization)

    X = fmin_cg(cost, initial, fprime=gradient, args=args, maxiter=1000)

    nP = X[0:(num_users * num_features)].reshape(num_users, num_features)
    nQ = X[(num_users * num_features):].reshape(num_products, num_features)

    return nP, nQ.T

# Cost Function
def RMSE(real, predicted):
    return np.sqrt(np.nanmean(np.square(real - predicted)))

In [0]:
U, I =  low_rank_matrix_factorization(df_ratings, num_features=10, regularization=0.1)

         Current function value: 915.437383
         Iterations: 1000
         Function evaluations: 1505
         Gradient evaluations: 1505


In [0]:
# Predict all the ratings for all users to all the products.
predict_ratings = np.matmul(U, I)

In [0]:
predict_ratings.shape

(9826, 1305)

In [0]:
I = np.transpose(I)

In [0]:
I.shape

(1305, 10)

In [0]:
# The following codes are devoted to find the top 5 items which are similar to 0321732944(item_id).
item_features = I[1] # 0321732944

In [0]:
distances = np.sum(np.abs(I - item_features), axis=1)

In [0]:
item_as_row = np.transpose(df_ratings)


In [0]:
item_as_row['distances'] = distances

In [0]:
top5_similiarity = item_as_row.sort_values('distances')[1: 6]

In [0]:
top5_similiarity

user_id,A00766851QZZUBOVF4JFT,A01255851ZO1U93P8RKGE,A0293130VTX2ZXA70JQS,A030530627MK66BD8V4LN,A0402564TCEO67AUZFJO,A04256643L1ZMT75HVD2P,A0590501PZ7HOWJKBGQ4,A0641581307AKT5MAOU0Q,A076219533YHEV2LJO988,A07865442XT0VEAVPSGX1,A0821988FXKFYX53V4QG,A085830328D6R4Z74JTJ,A099626739FNCRNHIKBCG,A100IGCRWUYAZW,A100LLXMXDZHJZ,A10121433FMMZNY18RWZ8,A1013OEQCVS6AR,A1013Q8NLCY56R,A101DJDPGK0Q46,A10285OSAM1MZK,A102IUFPXVDRB6,A102L0RVI4LHH3,A102RLOGIBBDMW,A1038957GWRBP375RU5T,A1058D8UFZL711,A105AZN9C8UAOJ,A105C374T9A12,A105R38CAPDBGU,A105ZJJNE4AFTU,A106YUCY4SVX1D,A1077JEY0CE3BB,A1079KKA0AXSL3,A107I2HYD2JZ5G,A107S4MT25VXQ5,A1088LCAS58FXN,A1089S59XSJT2T,A108EEYSHGDL6O,A108LYH0NZ9919,A108UXJTIV8XXY,A10956KAP9F1CO,...,AZKOO6TVQBDAF,AZKQZNA1FXKY8,AZL95QPWJDM3Y,AZM1KR1X4478H,AZM8PD2KBVXN5,AZMY6E8B52L2T,AZNMO6T8G0ZEU,AZO3DZAJ8S0D9,AZODG8OG0CETL,AZOFJX627J181,AZP1HKS4XINS0,AZPPZOYTNLEN0,AZPT44204EODM,AZPT6K5K0IVCT,AZPXDTWY5M5MQ,AZQG3BOZE5J8S,AZQJQBJ75DHMS,AZQOWHUHXMIMS,AZQQR46TEV0AJ,AZQQVI9RHJYH6,AZQZ3STMCBG5H,AZRPGTU8A7KD5,AZSUNAT0BRQDU,AZSZOO6PQKG6Z,AZT3AXKZYBY4P,AZTC7YC8PKG39,AZTC8ZV20NO1D,AZTYUD0UXIJI0,AZU626MLDSIAG,AZV9WA9MNT0FB,AZX0ZDVAFMN78,AZX5LAN9JEAFF,AZX7I110AF0W2,AZXFQVG0PS3V1,AZYNQZ94U6VDB,AZYTSU42BZ7TP,AZZ77XYX8O2WE,AZZGUU97OHUF1,AZZMV5VT9W7Y8,distances
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
9989467145,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.379998
9966236066,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.543156
9966635335,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.624299
9984984745,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.949017
9985582713,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.047546
