# UserCF

## 1.基础算法

In [None]:
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from collections import defaultdict
from data.movielens.data_reader import data_reader
from algorithm.usercf import UserCF
from util.metric import recall, precision, coverage, popularity

In [None]:
train_user_items, test_user_items  = data_reader()

In [None]:
user_cf = UserCF(train_user_items)

In [None]:
user_cf.user_similarity()

In [None]:
N = 30 #给每个用户推荐商品的数目
K = [5, 10, 20, 40, 80, 160]
recomments = []

In [None]:
for k in K:
    recomments.append(user_cf.recomment_users(set(test_user_items.keys()), k, N))

In [None]:
def evaluate(k):
    recall_k = round(recall(recomments[k], test_user_items), 4)
    precision_k = round(precision(recomments[k], test_user_items), 4)
    coverage_k = round(coverage(recomments[k], test_user_items), 4)
    popularity_k = round(popularity(recomments[k], test_user_items), 4)
    
    return recall_k, precision_k, coverage_k, popularity_k

In [None]:
evals = list()
for i in range(len(K)):
    evals.append(evaluate(i))

In [None]:
pd.DataFrame(data=evals,
             columns=['recall', 'precision', 'coverage', 'popularity'],
             index = K)

## 2.使用改进后的用户相似度计算公式

In [None]:
from algorithm.usercf1 import UserCF

user_cf = UserCF(train_user_items)
user_cf.user_similarity()
N = 30 #给每个用户推荐商品的数目
K = [5, 10, 20, 40, 80, 160]
recomments = []
for k in K:
    recomments.append(user_cf.recomment_users(set(test_user_items.keys()), k, N))
evals = list()
for i in range(len(K)):
    evals.append(evaluate(i))
pd.DataFrame(data=evals,
             columns=['recall', 'precision', 'coverage', 'popularity'],
             index = K)

# ItemCF

## 1.基础算法

In [None]:
from algorithm.itemcf import ItemCF

train_item_users, test_user_items  = data_reader(is_user_items = False)
item_cf = ItemCF(train_item_users)
item_cf.item_similarity()
N = 30 #给每个用户推荐商品的数目
K = [5, 10, 20, 40, 80, 160]
recomments = []
for k in K:
    recomments.append(item_cf.recomment_users(set(test_user_items.keys()), k, N))
evals = list()

In [None]:
for i in range(len(K)):
    evals.append(evaluate(i))
pd.DataFrame(data=evals,
             columns=['recall', 'precision', 'coverage', 'popularity'],
             index = K)

## 2.IUF相似度

In [None]:
from algorithm.itemcf1 import ItemCF

train_item_users, test_user_items  = data_reader(is_user_items = False)
item_cf = ItemCF(train_item_users)
item_cf.item_similarity()
N = 30 #给每个用户推荐商品的数目
K = [5, 10, 20, 40, 80, 160]
recomments = []
for k in K:
    recomments.append(item_cf.recomment_users(set(test_user_items.keys()), k, N))
evals = list()
for i in range(len(K)):
    evals.append(evaluate(i))
pd.DataFrame(data=evals,
             columns=['recall', 'precision', 'coverage', 'popularity'],
             index = K)

## 3.对每个物品和其它物品的相似度归一化

In [None]:
from algorithm.itemcfnorm import ItemCF

train_item_users, test_user_items  = data_reader(is_user_items = False)
item_cf = ItemCF(train_item_users)
item_cf.item_similarity()
N = 30 #给每个用户推荐商品的数目
K = [5, 10, 20, 40, 80, 160]
recomments = []
for k in K:
    recomments.append(item_cf.recomment_users(set(test_user_items.keys()), k, N))
evals = list()
for i in range(len(K)):
    evals.append(evaluate(i))
pd.DataFrame(data=evals,
             columns=['recall', 'precision', 'coverage', 'popularity'],
             index = K)

# 隐语义模型

In [None]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from collections import defaultdict
from algorithm.lfm import LFM
from util.metric import recall, precision, coverage, popularity

In [None]:
lfm = LFM(k=10, regularization_rate=0.01, learning_rate=0.02)
print('build matrix: ')
lfm.build_matrix()
print('start train:')
lfm.train(epoches=30)

In [None]:
recomments = list()
recomments.append(lfm.recomment_users(N = 30))

In [None]:
def evaluate(k, test):
    recall_k = round(recall(recomments[k], test), 4)
    precision_k = round(precision(recomments[k], test), 4)
    coverage_k = round(coverage(recomments[k], test), 4)
    popularity_k = round(popularity(recomments[k], test), 4)
    
    return recall_k, precision_k, coverage_k, popularity_k

In [None]:
evals = list()
evals.append(evaluate(0, lfm.test))
pd.DataFrame(data=evals,
             columns=['recall', 'precision', 'coverage', 'popularity'],
             index = [0])