In [1]:
import random
import math
import time
from tqdm import tqdm 

In [10]:
def timmer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        res = func(*args, **kwargs)
        end_time = time.time()
        print("Func %s, run time: %f"%(func.__name__, end_time-start_time))
        return res
    return wrapper

In [12]:
class Dataset():
    
    def __init__(self, fp):
        self.data = self.loadData(fp)
    
    @timmer
    def loadData(self, fp):
        data = []
        for l in open(fp):
            data.append(tuple(map(int, l.strip().split("::")[:2])))
        return data
    
    @timmer
    def splitData(self, M, k, seed=1):
        train, test = [], []
        random.seed(seed)
        for user, item in self.data:
            if random.randint(0, M-1) == k:
                test.append((user, item))
            else:
                train.append((user, item))
        
        # 转化成dict类型
        def convert_dict(data):
            data_dict = {}
            for user, item in data:
                if user not in data_dict:
                    data_dict[user] = set()
                data_dict[user].add(item)
            
            return {data_dict[user]: list(item) for user, item in data_dict.items()}

        return convert_dict(train), convert_dict(test)

In [14]:
# Eval Index: Precision, Recall, Coverage and Popularity
class Metric():
    def __init__(self, train, test, GetRecommendation):
        self.train = train
        self.test = test
        self.GetRecommendation = GetRecommendation
        self.recs = self.getRecs()
    
    def getRecs(self):
        recs = {}
        for user in test:
            rank = self.GetRecommendation(user)
            recs[user] = rank
        return recs
    
    def precision(self):
        all, hit = 0, 0
        for user in self.test:
            real_items = set(self.test[user])
            pred_items = self.recs[user]
            for item in real_items:
                if item in pred_items:
                    hit += 1
            all += len(pred_items)
        return round(hit / all * 100, 2)
    
    def Recall(self):
        all, hit = 0.0, 0.0
        for user in self.test:
            real_items = set(self.test[user])
            pred_items = self.recs[user]
            for item in pred_items:
                hit += 1
            all += len(real_items)
        return round(hit / all * 100, 2)        

In [1]:
def ItemCF(train, K):
    sim = {}
    num = {}
    for user in train:
        items = train[user]
        for i in range(len(items)):
            i1 = items[i]
            if i1 not in sim:
                sim[i1] = {}
            if i1 not in num:
                num[i1] = 0
            num[i1] += 1
            for j in range(len(items)):
                if i == j: continue
                i2 = items[j]
                if i2 not in sim[i1]:
                    sim[i1][i2] = 0
                sim[i1][i2] += 1

    for i1 in sim:
        for i2 in sim[i1]:
            sim[i1][i2] = sim[i1][i2]/math.sqrt(num[i1]*num[i2])
    
    sorted_item = {i1: list(sorted(i2.items(), key=lambda x: x[1], reverse=True)) for i1, i2 in sim.items()}
    
    # 获取接口函数
    def GetRecommendation(user):
        items = {}
        seen_items = set(train[user])
        for item in train[user]:
            for u, _ in sorted_item[item]:
                if u not in seen_items:
                    items[u] = 0
                items[u] += sorted_item[item][u]
        
        recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
        return recs
    
    return GetRecommendation