In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from lightfm import LightFM

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
interaction = [('u1', 'i1', 1), ('u1', 'i3', 2), ('u2', 'i2', 1), ('u2', 'i3', 3),
               ('u3', 'i1', 4), ('u3', 'i4', 5), ('u3', 'i2', 2)]
# 3user 4item 5种rate
interaction

[('u1', 'i1', 1),
 ('u1', 'i3', 2),
 ('u2', 'i2', 1),
 ('u2', 'i3', 3),
 ('u3', 'i1', 4),
 ('u3', 'i4', 5),
 ('u3', 'i2', 2)]

In [3]:
user_data = [('u1', {'f1': 5, 'f2': 2, 'f3': 1}),
 ('u2', {'f1': 0, 'f2': 1, 'f3': 3}),
 ('u3', {'f1': 4, 'f2': 3, 'f3': 3})]
user_data

[('u1', {'f1': 5, 'f2': 2, 'f3': 1}),
 ('u2', {'f1': 0, 'f2': 1, 'f3': 3}),
 ('u3', {'f1': 4, 'f2': 3, 'f3': 3})]

In [4]:
users = set(map(lambda i:i[0],interaction))
items = set(map(lambda i:i[1],interaction))
user_features = ['f1','f2','f3']
print(users,items,user_features)

{'u3', 'u2', 'u1'} {'i1', 'i2', 'i4', 'i3'} ['f1', 'f2', 'f3']


In [5]:
from lightfm.data import Dataset
# we call fit to supply user id, item id and user/item features
dataset1 = Dataset()
dataset1.fit_partial(users=users,items=items,user_features=user_features)
dataset1.mapping()

({'u3': 0, 'u2': 1, 'u1': 2},
 {'u3': 0, 'u2': 1, 'u1': 2, 'f1': 3, 'f2': 4, 'f3': 5},
 {'i1': 0, 'i2': 1, 'i4': 2, 'i3': 3},
 {'i1': 0, 'i2': 1, 'i4': 2, 'i3': 3})

In [6]:
# 构造用户特征矩阵
uf = dataset1.build_user_features(user_data)
uf.todense()

matrix([[0.09090909, 0.        , 0.        , 0.36363637, 0.27272728,
         0.27272728],
        [0.        , 0.2       , 0.        , 0.        , 0.2       ,
         0.6       ],
        [0.        , 0.        , 0.11111111, 0.5555556 , 0.22222222,
         0.11111111]], dtype=float32)

In [7]:
# 构造交互矩阵
(interactions, weights) = dataset1.build_interactions(interaction)
interactions.todense()

matrix([[1, 1, 1, 0],
        [0, 1, 0, 1],
        [1, 0, 0, 1]])

In [8]:
model = LightFM(loss='logistic')
# 下面的参数ubuntu上可运行，效果更好
# 参考 https://github.com/lyst/lightfm/issues/690
# model = LightFM(loss='warp')

model.fit(interactions,
      user_features= uf, 
      sample_weight= weights,
      epochs=10)

<lightfm.lightfm.LightFM at 0x22257c6ded0>

In [9]:
from lightfm.evaluation import auc_score
# 测试一下好像是
train_auc = auc_score(model,
                      interactions,
                      user_features=uf
                     ).mean()
print('Hybrid training set AUC: %s' % train_auc)


Hybrid training set AUC: 0.5


In [10]:
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset1.mapping()

In [14]:
# 预测现有的用户
# 注意输入的id必须要转换为lightfm内部的id
user_x = user_id_map['u3']
# 即对于该用户，看所有物品和他的匹配度
items_range = np.arange(len(items))
res = model.predict(user_x, items_range)
res = list(zip(items_range,res))
res = sorted(res, key=lambda x: x[1],reverse=True)
res

[(3, 0.5297617), (0, 0.51895887), (2, 0.4935133), (1, 0.46990803)]