In [352]:
import pandas as pd
import numpy as np

df_users = pd.read_csv("Mldata/users.csv")
df_posts = pd.read_csv("Mldata/posts.csv")
df_views = pd.read_csv("Mldata/views.csv")
df_posts.columns = ['_id' , 'title' , 'category' , 'ptype']
user_dictionary = {}
for i in range(len(df_users['_id'])):
    user_dictionary[df_users['_id'][i]] = i
    
posts_dictionary = {}
for i in range(len(df_posts)):
    posts_dictionary[df_posts['_id'][i]] = i
item_features = set()
for i in range(len(df_posts)):
    if(pd.notnull(df_posts['category'][i])):
        diff= df_posts['category'][i].split('|')
        #print(diff)
        for spec in diff:
            item_features.add(spec)

item_features.add('blog')
item_features.add('artwork')
item_features.add('project')
item_features.add('skill')

features = np.array(list(item_features))
features_dictionary = {}
for i in range(len(features)):
    features_dictionary[features[i]] = i


In [353]:
X = np.random.randn(len(df_posts) , len(features)) * 0.01
theta = np.random.randn(len(df_users) , len(features))*0.01
number_users = Y.shape[1]
number_posts = Y.shape[0]

In [354]:
Y = np.zeros((len(df_posts) , len(df_users)))
for i in range(len(df_views)):
    user  = df_views['user_id'][i]
    post = df_views['post_id'][i]
    if post in posts_dictionary:
        Y[posts_dictionary[post]][user_dictionary[user]] =1
print(np.sum(theta[9]))

0.13604327365321944


In [355]:
def compute_cost(X , Y , theta , lamda):
    J =0
    for j in range(number_users):
        for i in range(number_posts):
            if(Y[i][j]):
                x = X[i , :]
                x.resize(X.shape[1] , 1)
                the = theta[j , :]
                the.resize(theta.shape[1] , 1)
                J += ((np.dot(the.T , x) - Y[i][j]) ** 2)/2
    for j in range(number_users):
        J += lamda/2 * np.sum(theta[j,:])
    
    for i in range(number_posts):
        J += lamda/2 * np.sum(X[i,:])
    
    return J

def gradient_descen(alpha , num_iterations , X , Y , theta , lamda):
    m = Y.shape[0]
    number_users = Y.shape[1]
    number_posts = Y.shape[0]

    for k in range(num_iterations):
        tj = np.zeros((theta.shape[0] , theta.shape[1]))
        xi = np.zeros((X.shape[0] , X.shape[1]))
        for j in range(number_users):
            t = np.zeros((theta.shape[1]  , 1))
            for i in range(number_posts):
                if(Y[i][j] == 1):
                    x = X[i , :]
                    x.resize(X.shape[1] , 1)
                    the = theta[j , :]
                    the.resize(theta.shape[1] , 1)
                    t += (np.dot(the.T , x) - Y[i][j]) * x
            t = t[: , 0]
            tj[j,:] = theta[j,:]
            tj[j,:] = tj[j,:] -  alpha*(t + lamda*tj[j])
        for i in range(number_posts):
            xc = np.zeros((X.shape[1] , 1))
            for j in range(number_users):
                if(Y[i][j] == 1):
                    x = X[i , :]
                    x.resize(X.shape[1] , 1)
                    the = theta[j , :]
                    the.resize(theta.shape[1] , 1)
                    xc += (np.dot(the.T , x) - Y[i][j])*the
            xc= xc[:,0] 
            xi[i , :] = X[i , :]
            xi[i, :] = xi[i, :] - alpha*(xc + lamda*xi[i])
        theta = tj
        X = xi    
        if k%100 == 0:
            print(k , compute_cost(X , Y , theta , lamda))
    
    return [X , theta]

In [357]:
X , theta = gradient_descen(0.01 , 2000 , X , Y , theta , 0.5)

0 [[23.10483448]]
100 [[23.11885589]]
200 [[23.13466645]]
300 [[23.15172309]]


KeyboardInterrupt: 

In [435]:

def suggest_posts(user_index):
    #user_index = user_dictionary[user]
    theta_user = theta[user_index , :]
    theta_user.reshape(theta.shape[1] , 1)
    already_seen = []
    recommended = {}
    for i in range(number_posts):
        X_post = X[i,:]
        X_post.reshape(X.shape[1] , 1)
        
        if(Y[i][user_index] ==1):
            already_seen.append(i)
        else:
            recommended[i] = np.dot(theta_user.T , X_post)
    
    recommended_list = sorted(recommended.items() ,reverse = True ,  key=lambda x: x[1])
    print("Already seen posts: ")
    for i in range(len(already_seen)):
        print(df_posts['category'][already_seen[i]])
    
    print("")
    print("")
    print("Recommended: ")
    for i in range(len(recommended_list)):
        if(i>=20):
            break
        print(df_posts['category'][recommended_list[i][0]])


In [438]:
suggest_posts(28)

Already seen posts: 
Photography
Photography
Photography
Photography


Recommended: 
Marketing|Principles Of Marketing|Marketing Research Methadology|Marketing Management|International Marketing
Marketing|Principles Of Marketing|International Marketing|Promotion And Distribution Decisions
Photography
Psycholgy|Psychological Growth
Computer Technology|Cloud Computing
nan
Geography|Indian Geography
Computer Technology|Cloud Computing
Business|Business Strategies|Business Enviorment|New Venture Planning|Foreign Business|Business Organisation
Drawings
Photography
Computer Technology|Machine Learning
Sculptures
Computer Technology|Machine Learning
Biotechnology|Molecular Biology
Craft work
Graphics|Articulation|Computer Creation
Drawings|Painting|Visual Arts|Artistic design|Watercolours|Acrylics
Computer Technology|Machine Learning
Economics|Revenue Concept


In [377]:
def suggest_related_posts(post_index):
    #post_index = post_dictionary[post_index]
    this_vector = X[post_index , :]
    this_vector.reshape(X.shape[1] , 1)
    recommended = {}
    print(df_posts['category'][post_index])
    print()
    for i in (range(number_posts)):
        that_vector = X[i , :]
        that_vector.reshape(X.shape[1] , 1)
        dist = np.linalg.norm(this_vector - that_vector)
        recommended[i] = dist
    recommended_list = sorted(recommended.items() , key=lambda x: x[1])
    for i in range(len(recommended_list)):
        if(i>=10):
            break
        print(df_posts['category'][recommended_list[i][0]])

In [443]:
suggest_related_posts(12)

Drawings

Drawings
nan
Drawings
nan
Computer Technology|Computer Application
Drawings
Painting
E Commerce|Other Online Platforms
Drawings
nan


In [445]:
np.save("collaborative_theta" , theta)
np.save("collaborative_features" , X)