In [6]:
import time
import pandas as pd
import numpy as np
import math
from tqdm import tqdm, trange

In [3]:
def timmer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        res = func(*args, **kwargs)
        end_time = time.time()
        print('Func %s, cost %f'%(func.__name__, end_time-start_time))
        return res
    return wrapper

In [4]:
class Dataset():
    def __init__(self, fp):
        self.data = self.loadData(fp)
    
    def loadData(self, fp):
        data = []
        for l in open(fp):
            data.append(tuple(map(int, l.strip().split("::")[:2])))
        return data
    
    def splitData(self, M, k, seed=1):
        train, test = [], []
        import random
        random.seed(seed)
        for user, item in self.data:
            if random.randint(0, M-1) == k:
                test.append((user, item))
            else:
                train.append((user, item))
        
        def convert_dict(data):
            data_dict = {}
            for user,item in data:
                if user in data_dict:
                    data_dict[user] = set()
                data_dict[user].add(item)
            data_dict = {}
            return data_dict
        
        return convert_dict(train), convert_dict(test)

In [7]:
def LFM(train, ratio, K, lr, step, lmbda, N):
    all_items = {}
    for user in train:
        for item in train[user]:
            if item not in all_items:
                all_items[item] = 0
            all_items[item] += 1
    all_items = list(all_items.items())
    items = [x[0] for x in all_items]
    pops = [x[1] for x in all_items]
    
    # 样本
    def nSample(data, ratio):
        new_data = {}
        for user in train:
            if user not in new_data:
                new_data[user] = {}
            for item in train[user]:
                new_data[user][item] = 1
        
        for user in train:
            seen_items = train[user]
            pos_num = len(seen_items)
            item = np.random.choice(items, int(pos_num*ratio*3), pops)
            item = [x for x in item if x not in seen_items][:int(pos_num*ratio)]
            new_data[user].update({x:0 for x in item})
        
        return new_data
        
    # 训练跑， Q
    P, Q = {}, {}
    for user in train:
        P[user] = np.random.random(K)
    for item in items:
        Q[item] = np.random.random(K)
    
    for s in range(0, step):
        data = nSample(train, ratio)
        for user in data:
            for item in data[user]:
                eui = data[user][item] - (P[user] * Q[item]).sum()
                P[user] += lr*(Q[item]*eui - lmbda*P[user])
                Q[item] += lr*(P[user]*eui - lmbda*Q[item])
            lr *= 0.9
    
    def GetRecommendation(user):
        seen_items = set(train[user])
        recs = {}
        for item in items:
            if item not in seen_items:
                recs[item] = (P[user]*Q[item]).sum()
        recs = list(sorted(recs.items(), key=lambda x: x[1], reverse=True))[:N]
        return recs