In [1]:
import pandas as pd

#read csv files
df_f=pd.read_csv("follows.csv")
df_i=pd.read_csv("interests.csv")

df_f.columns = df_f.columns.str.strip()
df_i.columns = df_i.columns.str.strip()

#one-hot coding
df_i = pd.get_dummies(df_i,columns=['category'])
df_i = df_i.groupby('user_id').sum()

df_f = pd.get_dummies(df_f,columns=['followee_id'])
df_f = df_f.groupby('follower_id').sum()
df_f.index.names = ['user_id']

In [2]:
#rename df_f columns
df_f_cols= [df_f.columns.tolist()[i].strip('followee_id_') for i in range(len(df_f.columns.tolist()))]
df_f.columns= df_f_cols          

In [3]:
#inner join two make both dataframes have same user_ids
df_test1 = pd.DataFrame.join(df_i,df_f,on='user_id',how='inner')
df_test1.shape

(2237, 4478)

In [4]:
#transpose datasets
df_reduced_i = df_test1.T.iloc[:883].T
df_reduced_f = df_test1.T.iloc[883:].T

In [5]:
#transpose datasets
df_f_t = df_reduced_f.T
df_f_t.index.names=['followee_id']
df_f_t.index=df_f_t.index.astype('int64')
df_reduced_i.index.names = ['followee_id']

In [6]:
#make followees are same as user_ids
df_test2 = pd.DataFrame.merge(df_reduced_i,df_f_t,left_index=True, right_index=True)
df_test2.index.names=['user_id']

In [7]:
#transpose back to origin dataframes
df_interest = df_test2.T.iloc[:883].T
df_followee = df_test2.T.iloc[883:].T

In [8]:
import sklearn
from sklearn.neighbors import NearestNeighbors

#create predicition matrix for recommended followees
df_recom_c = pd.DataFrame(columns =df_followee.columns.tolist() ,index =df_followee.index.tolist() )
df_recom_j = pd.DataFrame(columns =df_followee.columns.tolist() ,index =df_followee.index.tolist() )

In [10]:
#KNN model - consine matric
for n in df_interest.index.values:
    
    nrecommendation = df_followee.loc[n].sum()

    #build the model
    model_knn_c = NearestNeighbors(metric = 'cosine', algorithm = 'auto')
    model_knn_c.fit(df_interest.values)
    
    #generate the recommendation followees for each id
    distances_c, indices_c = model_knn_c.kneighbors(df_interest.loc[n, :].values.reshape(1, -1), n_neighbors = nrecommendation)
    df_recom_c.at[n ,df_interest.index[indices_c.flatten()].values] = 1

In [11]:
#KNN model - jaccard matric
for n in df_interest.index.values:
    
    nrecommendation = df_followee.loc[n].sum()

    #build the model
    model_knn_j = NearestNeighbors(metric = 'jaccard', algorithm = 'auto')
    model_knn_j.fit(df_interest.values)
    
    #generate the recommendation followees for each id
    distances_j, indices_j = model_knn_j.kneighbors(df_interest.loc[n, :].values.reshape(1, -1), n_neighbors = nrecommendation)
    df_recom_j.at[n ,df_interest.index[indices_j.flatten()].values] = 1

In [12]:
from sklearn.metrics import precision_score
from statistics import mean

#precision score function
def prec_score(df_true, df_prediction):
    pre = [precision_score(df_true.loc[i],df_prediction.loc[i])for i in df_prediction.index.values ]
    return mean(pre)

In [13]:
from sklearn.metrics import f1_score

#f1 score function
def fone_score(df_true, df_prediction):
    f1 = [f1_score(df_true.loc[i],df_prediction.loc[i]) for i in df_prediction.index.values]
    return mean(f1)

In [14]:
#prediction/ recommended followee of cosine
df_pred_c = df_recom_c.fillna(0)
df_true_c = df_followee

#prediction/ recommended followee of jaccard
df_pred_j = df_recom_j.fillna(0)
df_true_j = df_followee

In [15]:
print("precision_score for cosine matric is: ",prec_score(df_true_c,df_pred_c))
print("f1_score is: ",fone_score(df_true_c,df_pred_c))

precision_score for cosine matric is:  0.13626048387793355
f1_score is:  0.13626048387793355


In [16]:
print("precision_score for jaccard matric is: ",prec_score(df_true_j,df_pred_j))
print("f1_score is: ",fone_score(df_true_j,df_pred_j))

precision_score for jaccard matric is:  0.20328113039558496
f1_score is:  0.20328113039558496
