In [27]:
import pandas as pd
import numpy as np

In [28]:
df_users = pd.read_csv("Mldata/users.csv")
df_posts = pd.read_csv("Mldata/posts.csv")
df_views = pd.read_csv("Mldata/views.csv")
df_posts.columns = ['_id' , 'title' , 'category' , 'ptype']
number_posts = len(df_posts)
number_users = len(df_users)

In [29]:
user_dictionary = {}
for i in range(len(df_users['_id'])):
    user_dictionary[df_users['_id'][i]] = i
    
posts_dictionary = {}
for i in range(len(df_posts)):
    posts_dictionary[df_posts['_id'][i]] = i


In [59]:
#loads the data that has been trained on gradient descent in content.ipynb and collaborative.ipynb 
contentbased_theta = np.load('content_theta.npy')
contentbased_features = np.load('content_features.npy')

collaborativebased_theta = np.load('collaborative_theta.npy')
collaborativebased_features = np.load('collaborative_features.npy')

Y = np.zeros((len(df_posts) , len(df_users)))
for i in range(len(df_views)):
    user  = df_views['user_id'][i]
    post = df_views['post_id'][i]
    if post in posts_dictionary:
        Y[posts_dictionary[post]][user_dictionary[user]] =1

In [81]:
def suggest_posts(user_index  , method ):
    #
    if(method == "content_based"):
        theta = contentbased_theta
        X = contentbased_features
    else:
        theta = collaborativebased_theta
        X = collaborativebased_features
    
    
    theta_user = theta[user_index , :]
    theta_user.reshape(theta.shape[1] , 1)
    already_seen = []
    recommended = {}
    for i in range(number_posts):
        X_post = X[i,:]
        X_post.reshape(X.shape[1] , 1)
        
        if(Y[i][user_index] == 1):
            already_seen.append(i)
        else:
            recommended[i] = np.dot(theta_user.T , X_post)
    
    #sorts the "possible rating" for each post in reverse order
    recommended_list = sorted(recommended.items() , reverse = True , key=lambda x: x[1])
    print("Already seen posts: ")
    for i in range(len(already_seen)):
        print("Title: " ,df_posts['title'][i]  ,"\nCategories: "  ,  df_posts['category'][already_seen[i]] , "\n")
    
    
    print("\nRecommended: \n")
    for i in range(len(recommended_list)):
        #prints the top 10
        if(i>=10):
            break
        print("Title: " ,df_posts['title'][recommended_list[i][0]]  ,"\nCategories: "  ,  df_posts['category'][recommended_list[i][0]] , "\n")

            

In [82]:
def suggest_related_posts(post_index , method):

    if(method == "content_based"):
        theta = contentbased_theta
        X = contentbased_features
    else:
        theta = collaborativebased_theta
        X = collaborativebased_features
    
    
    this_vector = X[post_index , :]
    this_vector.reshape(X.shape[1] , 1)
    recommended = {}
    print("Given post: ")
    print("Title: " ,df_posts['title'][post_index]  ,"\nCategories: "  ,  df_posts['category'][post_index] , "\n")

    print("Suggested: ")
    print()
    #calculates the euclidian distance between the feature vectors of given post and every other post
    for i in (range(number_posts)):
        that_vector = X[i , :]
        that_vector.reshape(X.shape[1] , 1)
        dist = np.linalg.norm(this_vector - that_vector)
        recommended[i] = dist
        
    #sorts the distances in reverse order and prints the top 10
    recommended_list = sorted(recommended.items() , key=lambda x: x[1])
    for i in range(len(recommended_list)):
        if(i>=10):
            break
        elif(i==0):
            continue
        print("Title: " ,df_posts['title'][recommended_list[i][0]]  ,"\nCategories: "  ,  df_posts['category'][recommended_list[i][0]] , "\n")


In [83]:
x = int(input("Enter the index of user  ( 0- 187)"))
#can do user id as well, which can be converted to index with the help of user_dictionary
t = int(input("Enter choice: \n1.Content Based\n2.Collaborative Based\n"))
if(t==1):
    suggest_posts(x , "content_based")
else:
    suggest_posts(x , "collaborative_based")

Enter the index of user  ( 0- 187) 32
Enter choice: 
1.Content Based
2.Collaborative Based
 1


Already seen posts: 
Title:  hello there 
Categories:  Auditing|Internal Audit 

Title:  Ml and AI 
Categories:  Auditing|Internal Financial Control 

Title:  What is an Operating System ? 
Categories:  Auditing|Audit Evidence 

Title:  Lord Shiva 
Categories:  Taxation|GST 

Title:  How Competition law evolved? 
Categories:  Taxation|Direct Tax 

Title:  Raghavan Committee - 2002. 
Categories:  Auditing|Secratarial Audit 

Title:  Let's discuss some Case laws! 
Categories:  Banking|Banking Companies 

Title:  Forms of Cartel. 
Categories:  Banking|Banking Technology 

Title:  Biodiversity 
Categories:  Auditing|Audit Remark 

Title:  Economic policy 
Categories:  Auditing|Cost Audit 

Title:  Corporate design 
Categories:  Legal Studies|Legal System 

Title:  Trident 
Categories:  Sports Coaching|Sports Law 

Title:  Lord shiva 
Categories:  Economics|Economics Sociology 

Title:  Smart City 
Categories:  Economics|Revenue Concept 

Title:  Om Namah Shivaay 
Categories:  Human Rights|

In [78]:
x = int(input("Enter the index of the post (0-492)"))
#can do post id as well using the post_dictionary
t =int(input("Enter choice: \n1.Content Based\n2.Collaborative Based\n"))
if(t==1):
    suggest_related_posts(x , "content_based")
else:
    suggest_related_posts(x , "collaborative_based")

Enter the index of the post (0-492) 2
Enter choice: 
1.Content Based
2.Collaborative Based
 1


Given post: 
Title:  What is an Operating System ? 
Categories:  Operating Systems 


Suggested: 

Title:  Operating System 1 
Categories:  Computer Technology|Operating Systems
Title:  Data Transmission Modes 
Categories:  Computer Technology|Computer Application|Operating Systems
Title:  Students Networking! 
Categories:  nan
Title:  hello there 
Categories:  Plant Biotechnology
Title:  How Competition law evolved? 
Categories:  Competition Laws
Title:  Raghavan Committee - 2002. 
Categories:  Competition Laws
Title:  Let's discuss some Case laws! 
Categories:  Competition Laws
Title:  Forms of Cartel. 
Categories:  Competition Laws
Title:  Biodiversity 
Categories:  Eco System
