# 취향 분석

In [1]:
import pandas as pd
import numpy as np
from lightfm.data import Dataset
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k
from lightfm import LightFM

from hyperopt import fmin, hp, tpe, Trials
from scipy.io import mmwrite



In [2]:
pd.set_option('display.max_columns', None)

In [23]:
style_cloth_column = ['newClothId', 'fit', 'feeling', 'stretch', 'visibility', 'thickness']
style_user_column = ['userId', 'size', 'bright', 'color', 'thickness']

# Top

In [107]:
transaction = pd.read_json('./transaction_top.json')
cloth_meta = pd.read_json('./final_top.json')
user_meta = pd.read_json('./user_top.json')
transaction = transaction[['userId', 'newClothId', 'shopCnt']]

In [108]:
transaction = transaction[transaction.shopCnt > 1]
transaction.shape

(286639, 3)

In [109]:
unique_id_cloth_meta = list(set(cloth_meta.newClothId))
transaction=transaction.loc[transaction.newClothId.isin(unique_id_cloth_meta),:]

In [110]:
a = transaction[['newClothId']]
a.insert(1, 'idx', 0, True)
a = a.drop_duplicates(['newClothId'])
a.idx = range(len(a))

In [111]:
transaction = pd.merge(transaction, a, how="left", on="newClothId")

In [112]:
user_meta = user_meta[style_user_column]

In [113]:
unique_id_user_meta = list(set(transaction.userId))
user_meta=user_meta.loc[user_meta.userId.isin(unique_id_user_meta),:]
user_meta = user_meta.reset_index(drop=True)
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,858064,1.250000,1.000000,1.0,2.750000
1,858065,1.600000,1.800000,2.0,2.200000
2,858067,1.750000,2.000000,2.0,1.500000
3,858070,1.000000,2.000000,1.0,3.000000
4,858071,2.000000,1.800000,2.0,2.000000
...,...,...,...,...,...
131310,1416935,1.866667,2.000000,1.6,2.133333
131311,1416936,1.333333,1.666667,2.0,1.333333
131312,1416938,1.600000,2.200000,2.2,2.200000
131313,1416946,2.000000,2.000000,2.0,2.166667


In [114]:
new_user = pd.DataFrame(user_meta.loc[0]).transpose()
new_user_idx = new_user.index
new_user.loc[new_user_idx, ('userId')] = 8299
new_user.loc[new_user_idx, ('size')] = 0.3
new_user.loc[new_user_idx, ('bright')] = 0.3
new_user.loc[new_user_idx, ('color')] = 0.4
new_user.loc[new_user_idx, ('thickness')] = 0
new_user = new_user[['userId',  'size', 'bright', 'color', 'thickness']]
new_user

Unnamed: 0,userId,size,bright,color,thickness
0,8299.0,0.3,0.3,0.4,0.0


In [115]:
from copy import deepcopy
sub_user = deepcopy(user_meta)

In [116]:
sub_user.size = sub_user.apply(lambda x: abs(x.size-new_user['size'].iloc[0]), axis=1)
sub_user.bright = sub_user.apply(lambda x: abs(x.bright-new_user['bright'].iloc[0]), axis=1)
sub_user.color = sub_user.apply(lambda x: abs(x.color-new_user['color'].iloc[0]), axis=1)
sub_user.thickness = sub_user.apply(lambda x: abs(x.thickness-new_user['thickness'].iloc[0]), axis=1)

In [117]:
sub_col = list(sub_user.columns)[1:]

In [118]:
sub_user.insert(1, 'favorSum', 0, True)
sub_user.favorSum = sum(sub_user[col_i] for col_i in sub_col)
sub_user = sub_user.sort_values('favorSum')

In [119]:
real_user = []
for i in range(3):
    real_user.append(sub_user.iloc[i].userId)
real_user = [int(i) for i in real_user]
print(real_user)

[1070345, 882478, 1115384]


In [120]:
cloth_meta = cloth_meta[style_cloth_column]

In [121]:
cloth_meta

Unnamed: 0,newClothId,fit,feeling,stretch,visibility,thickness
0,1929966,0,0,0,0,0
1,1927658,0,0,0,0,0
2,1927660131403,0,0,0,0,0
3,1928209,0,0,0,0,0
4,1927270,0,0,0,0,0
...,...,...,...,...,...,...
249601,1413008,0,0,0,0,0
249602,779965,3,3,3,3,1
249603,1353489,0,0,0,0,0
249604,1607233,0,0,0,0,0


In [122]:
cloth_meta = pd.merge(cloth_meta, a, how="left", on="newClothId")

In [123]:
unique_cloth_id = list(transaction['newClothId'].unique())
len(unique_cloth_id)

59819

In [124]:
a = cloth_meta.newClothId.isin(unique_cloth_id)
cloth_meta = cloth_meta[a]
cloth_meta.index = range(len(cloth_meta))

In [125]:
cloth_meta.idx = cloth_meta.idx.astype('int')

In [126]:
transaction_source = [(transaction['userId'][i], transaction['idx'][i]) for i in range(transaction.shape[0])]

In [127]:
print('userId의 유니크 개수 :', len(transaction.userId.unique()))
print('clothId의 유니크 개수 :', len(transaction.idx.unique()))
print('user테이블의 userId의 유니크 개수 :', len(user_meta.userId.unique()))
print('cloth테이블의 clothId의 유니크 개수 :', len(cloth_meta.idx.unique()))

userId의 유니크 개수 : 131315
clothId의 유니크 개수 : 59819
user테이블의 userId의 유니크 개수 : 131315
cloth테이블의 clothId의 유니크 개수 : 59819


In [128]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,858064,1.250000,1.000000,1.0,2.750000
1,858065,1.600000,1.800000,2.0,2.200000
2,858067,1.750000,2.000000,2.0,1.500000
3,858070,1.000000,2.000000,1.0,3.000000
4,858071,2.000000,1.800000,2.0,2.000000
...,...,...,...,...,...
131310,1416935,1.866667,2.000000,1.6,2.133333
131311,1416936,1.333333,1.666667,2.0,1.333333
131312,1416938,1.600000,2.200000,2.2,2.200000
131313,1416946,2.000000,2.000000,2.0,2.166667


In [129]:
user_features_source = [(user_meta['userId'][i],
                        [user_meta['size'][i], user_meta['bright'][i], user_meta['color'][i],
                        user_meta['thickness'][i]]) for i in range(user_meta.shape[0])]

In [130]:
cloth_features_source = [(cloth_meta['idx'][i],
                        [cloth_meta['fit'][i], cloth_meta['feeling'][i], cloth_meta['stretch'][i],
                        cloth_meta['visibility'][i], cloth_meta['thickness'][i]]) for i in range(cloth_meta.shape[0])]

In [131]:
dataset = Dataset()
dataset.fit(users=transaction['userId'].unique(),
           items=transaction['idx'].unique(),
           user_features=user_meta[user_meta.columns[1:]].values.flatten(),
           item_features=cloth_meta[cloth_meta.columns[1:]].values.flatten())

In [132]:
interactions, weights = dataset.build_interactions(transaction_source)

In [133]:
user_features = dataset.build_user_features(user_features_source)

In [134]:
cloth_features = dataset.build_item_features(cloth_features_source)

In [135]:
train, test = random_train_test_split(interactions, test_percentage=0.2)
train, test = train.tocsr().tocoo(), test.tocsr().tocoo()
train_weights = train.multiply(weights).tocoo()
test_weights = test.multiply(weights).tocoo()

In [30]:
# no_components: 10
# learning_rate : 0.05
trials = Trials()
space = [hp.choice('no_components', range(10, 21, 5)),
         hp.uniform('learning_rate', 0.05, 0.1)]

In [85]:
# Define Objective Function
def objective(params):
    no_components, learning_rate = params
    model = LightFM(no_components=no_components,
                    learning_schedule='adagrad',
                    loss='warp',
                    learning_rate=learning_rate,
                    random_state=0)

    model.fit(interactions=train,
              item_features=cloth_features,
              user_features=user_features,
              sample_weight=train_weights,
              epochs=3,
              verbose=False)

    test_precision = precision_at_k(model, test, k=5, item_features=cloth_features, user_features=user_features).mean()
    print("no_comp: {}, lrn_rate: {:.5f}, precision: {:.5f}".format(
      no_components, learning_rate, test_precision))
    # test_auc = auc_score(model, test, item_features=item_features).mean()
    output = -test_precision

    if np.abs(output+1) < 0.01 or output < -1.0:
        output = 0.0

    return output

In [None]:
best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

In [31]:
learning_rate = round(best_params['learning_rate'], 3)
no_components = best_params['no_components']
print(learning_rate, no_components)

0.05 2


In [136]:
learning_rate = 0.03
no_components = 3

In [137]:
model = LightFM(no_components=no_components,
                learning_schedule='adagrad',
                loss='warp',
                learning_rate=learning_rate,
                random_state=0)

model.fit(interactions=interactions,
          item_features=cloth_features,
          user_features=user_features,
          sample_weight=weights,
          epochs=10,
          verbose=True)

Epoch: 100%|██████████| 10/10 [00:06<00:00,  1.61it/s]


<lightfm.lightfm.LightFM at 0x1f365e43760>

In [138]:
# 모델 저장하기
import pickle
with open('model_style_top.pickle', 'wb') as fw:
    pickle.dump(model, fw)

In [139]:
# 모델 불러오기
import pickle
with open('model_style_top.pickle', 'rb') as f:
    model_top = pickle.load(f)

In [140]:
def sample_recommendation(model, idx, user_ids):
    cnt = 1
    for user_id in user_ids:
        if cnt==1:
            scores = model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
        else:
            scores += model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
    
    top_items = idx[np.argsort(-scores)]
    
    sub_clothes = []

    for x in top_items[:10]:
        sub_clothes.append(x)
    
    rec_clothes = []
    
    for i in sub_clothes:
        rec_clothes.append(cloth_meta[cloth_meta.idx==i].newClothId.iloc[0])
    
    return rec_clothes

In [141]:
rec_size_top = sample_recommendation(model_top, cloth_meta.idx, real_user)
rec_size_top

[2139133140300,
 2139144142723,
 1440587133960,
 1282892131862,
 1938650133697,
 1572660131416,
 1618882132452,
 1873849131956,
 1045284131496,
 1804104134824]

In [142]:
real_user

[1070345, 882478, 1115384]

In [143]:
cloth_meta.insert(7, 'what', 11, True)
user_meta.insert(5, 'what', 11, True)

In [144]:
cloth_meta = cloth_meta[["newClothId", "idx", 'what']]
cloth_meta

Unnamed: 0,newClothId,idx,what
0,1440513131686,50031,11
1,1930909131765,7019,11
2,1930909131764,7840,11
3,1930909131766,7374,11
4,1931411131771,9496,11
...,...,...,...
59814,1281638267701,31274,11
59815,1622670267773,9458,11
59816,1556851267795,47549,11
59817,1651849267836,3995,11


In [145]:
cloth_meta.to_json('style_top_cloth_meta.json', orient='records', force_ascii=False)
user_meta.to_json('style_top_user_meta.json', orient='records', force_ascii=False)

In [146]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness,what
0,858064,1.250000,1.000000,1.0,2.750000,11
1,858065,1.600000,1.800000,2.0,2.200000,11
2,858067,1.750000,2.000000,2.0,1.500000,11
3,858070,1.000000,2.000000,1.0,3.000000,11
4,858071,2.000000,1.800000,2.0,2.000000,11
...,...,...,...,...,...,...
131310,1416935,1.866667,2.000000,1.6,2.133333,11
131311,1416936,1.333333,1.666667,2.0,1.333333,11
131312,1416938,1.600000,2.200000,2.2,2.200000,11
131313,1416946,2.000000,2.000000,2.0,2.166667,11


# Outer

In [147]:
transaction = pd.read_json('./transaction_outer.json')
cloth_meta = pd.read_json('./final_outer.json')
user_meta = pd.read_json('./user_outer.json')
transaction = transaction[['userId', 'newClothId', 'shopCnt']]

In [148]:
transaction = transaction[transaction.shopCnt > 1]
transaction.shape

(171253, 3)

In [149]:
unique_id_cloth_meta = list(set(cloth_meta.newClothId))
transaction=transaction.loc[transaction.newClothId.isin(unique_id_cloth_meta),:]

In [150]:
a = transaction[['newClothId']]
a.insert(1, 'idx', 0, True)
a = a.drop_duplicates(['newClothId'])
a.idx = range(len(a))

In [151]:
transaction = pd.merge(transaction, a, how="left", on="newClothId")

In [152]:
user_meta = user_meta[style_user_column]

In [153]:
unique_id_user_meta = list(set(transaction.userId))
user_meta=user_meta.loc[user_meta.userId.isin(unique_id_user_meta),:]
user_meta = user_meta.reset_index(drop=True)
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,488503,2.0,2.000000,2.000000,0.000000
1,488504,2.0,2.000000,3.000000,0.000000
2,488506,2.0,2.000000,2.000000,3.000000
3,488507,2.0,2.000000,1.666667,2.000000
4,488509,1.0,2.000000,2.000000,1.000000
...,...,...,...,...,...
93245,858056,2.2,2.000000,1.400000,1.600000
93246,858057,2.0,2.333333,1.000000,2.333333
93247,858058,2.0,3.000000,2.000000,3.000000
93248,858060,1.0,1.000000,1.000000,1.000000


In [154]:
new_user = pd.DataFrame(user_meta.loc[0]).transpose()
new_user_idx = new_user.index
new_user.loc[new_user_idx, ('userId')] = 8299
new_user.loc[new_user_idx, ('size')] = 0.3
new_user.loc[new_user_idx, ('bright')] = 0.3
new_user.loc[new_user_idx, ('color')] = 0.4
new_user.loc[new_user_idx, ('thickness')] = 0
new_user = new_user[['userId',  'size', 'bright', 'color', 'thickness']]
new_user

Unnamed: 0,userId,size,bright,color,thickness
0,8299.0,0.3,0.3,0.4,0.0


In [155]:
from copy import deepcopy
sub_user = deepcopy(user_meta)

In [156]:
sub_user.size = sub_user.apply(lambda x: abs(x.size-new_user['size'].iloc[0]), axis=1)
sub_user.bright = sub_user.apply(lambda x: abs(x.bright-new_user['bright'].iloc[0]), axis=1)
sub_user.color = sub_user.apply(lambda x: abs(x.color-new_user['color'].iloc[0]), axis=1)
sub_user.thickness = sub_user.apply(lambda x: abs(x.thickness-new_user['thickness'].iloc[0]), axis=1)

In [157]:
sub_col = list(sub_user.columns)[1:]

In [158]:
sub_user.insert(1, 'favorSum', 0, True)
sub_user.favorSum = sum(sub_user[col_i] for col_i in sub_col)
sub_user = sub_user.sort_values('favorSum')

In [159]:
real_user = []
for i in range(3):
    real_user.append(sub_user.iloc[i].userId)
real_user = [int(i) for i in real_user]
print(real_user)

[774953, 774971, 604362]


In [160]:
cloth_meta = cloth_meta[style_cloth_column]

In [161]:
cloth_meta

Unnamed: 0,newClothId,fit,feeling,stretch,visibility,thickness
0,2401813,0,0,0,0,0
1,234005278848,2,3,3,5,3
2,2379678,0,0,0,0,0
3,2360803,0,0,0,0,0
4,2354237,0,0,0,0,0
...,...,...,...,...,...,...
85636,2322135130506,0,0,0,0,0
85637,2322135130507,0,0,0,0,0
85638,2322135130508,0,0,0,0,0
85639,2352578,0,0,0,0,0


In [162]:
cloth_meta = pd.merge(cloth_meta, a, how="left", on="newClothId")

In [163]:
unique_cloth_id = list(transaction['newClothId'].unique())
len(unique_cloth_id)

25916

In [164]:
a = cloth_meta.newClothId.isin(unique_cloth_id)
cloth_meta = cloth_meta[a]
cloth_meta.index = range(len(cloth_meta))

In [165]:
cloth_meta.idx = cloth_meta.idx.astype('int')

In [166]:
transaction_source = [(transaction['userId'][i], transaction['idx'][i]) for i in range(transaction.shape[0])]

In [167]:
print('userId의 유니크 개수 :', len(transaction.userId.unique()))
print('clothId의 유니크 개수 :', len(transaction.idx.unique()))
print('user테이블의 userId의 유니크 개수 :', len(user_meta.userId.unique()))
print('cloth테이블의 clothId의 유니크 개수 :', len(cloth_meta.idx.unique()))

userId의 유니크 개수 : 93250
clothId의 유니크 개수 : 25916
user테이블의 userId의 유니크 개수 : 93250
cloth테이블의 clothId의 유니크 개수 : 25916


In [168]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,488503,2.0,2.000000,2.000000,0.000000
1,488504,2.0,2.000000,3.000000,0.000000
2,488506,2.0,2.000000,2.000000,3.000000
3,488507,2.0,2.000000,1.666667,2.000000
4,488509,1.0,2.000000,2.000000,1.000000
...,...,...,...,...,...
93245,858056,2.2,2.000000,1.400000,1.600000
93246,858057,2.0,2.333333,1.000000,2.333333
93247,858058,2.0,3.000000,2.000000,3.000000
93248,858060,1.0,1.000000,1.000000,1.000000


In [169]:
user_features_source = [(user_meta['userId'][i],
                        [user_meta['size'][i], user_meta['bright'][i], user_meta['color'][i],
                        user_meta['thickness'][i]]) for i in range(user_meta.shape[0])]

In [170]:
cloth_features_source = [(cloth_meta['idx'][i],
                        [cloth_meta['fit'][i], cloth_meta['feeling'][i], cloth_meta['stretch'][i],
                        cloth_meta['visibility'][i], cloth_meta['thickness'][i]]) for i in range(cloth_meta.shape[0])]

In [171]:
dataset = Dataset()
dataset.fit(users=transaction['userId'].unique(),
           items=transaction['idx'].unique(),
           user_features=user_meta[user_meta.columns[1:]].values.flatten(),
           item_features=cloth_meta[cloth_meta.columns[1:]].values.flatten())

In [172]:
interactions, weights = dataset.build_interactions(transaction_source)

In [173]:
user_features = dataset.build_user_features(user_features_source)

In [174]:
cloth_features = dataset.build_item_features(cloth_features_source)

In [175]:
train, test = random_train_test_split(interactions, test_percentage=0.2)
train, test = train.tocsr().tocoo(), test.tocsr().tocoo()
train_weights = train.multiply(weights).tocoo()
test_weights = test.multiply(weights).tocoo()

In [30]:
# no_components: 10
# learning_rate : 0.05
trials = Trials()
space = [hp.choice('no_components', range(10, 21, 5)),
         hp.uniform('learning_rate', 0.05, 0.1)]

In [85]:
# Define Objective Function
def objective(params):
    no_components, learning_rate = params
    model = LightFM(no_components=no_components,
                    learning_schedule='adagrad',
                    loss='warp',
                    learning_rate=learning_rate,
                    random_state=0)

    model.fit(interactions=train,
              item_features=cloth_features,
              user_features=user_features,
              sample_weight=train_weights,
              epochs=3,
              verbose=False)

    test_precision = precision_at_k(model, test, k=5, item_features=cloth_features, user_features=user_features).mean()
    print("no_comp: {}, lrn_rate: {:.5f}, precision: {:.5f}".format(
      no_components, learning_rate, test_precision))
    # test_auc = auc_score(model, test, item_features=item_features).mean()
    output = -test_precision

    if np.abs(output+1) < 0.01 or output < -1.0:
        output = 0.0

    return output

In [None]:
best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

In [31]:
learning_rate = round(best_params['learning_rate'], 3)
no_components = best_params['no_components']
print(learning_rate, no_components)

0.05 2


In [176]:
learning_rate = 0.03
no_components = 3

In [177]:
model = LightFM(no_components=no_components,
                learning_schedule='adagrad',
                loss='warp',
                learning_rate=learning_rate,
                random_state=0)

model.fit(interactions=interactions,
          item_features=cloth_features,
          user_features=user_features,
          sample_weight=weights,
          epochs=10,
          verbose=True)

Epoch: 100%|██████████| 10/10 [00:03<00:00,  3.06it/s]


<lightfm.lightfm.LightFM at 0x1f365e5cb20>

In [178]:
# 모델 저장하기
import pickle
with open('model_style_outer.pickle', 'wb') as fw:
    pickle.dump(model, fw)

In [179]:
# 모델 불러오기
import pickle
with open('model_style_outer.pickle', 'rb') as f:
    model_outer = pickle.load(f)

In [180]:
def sample_recommendation(model, idx, user_ids):
    cnt = 1
    for user_id in user_ids:
        if cnt==1:
            scores = model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
        else:
            scores += model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
    
    top_items = idx[np.argsort(-scores)]
    
    sub_clothes = []

    for x in top_items[:10]:
        sub_clothes.append(x)
    
    rec_clothes = []
    
    for i in sub_clothes:
        rec_clothes.append(cloth_meta[cloth_meta.idx==i].newClothId.iloc[0])
    
    return rec_clothes

In [181]:
rec_size_top = sample_recommendation(model_outer, cloth_meta.idx, real_user)
rec_size_top

[177840683604,
 238071082834,
 157135683282,
 231905785481,
 132418381736,
 209117486230,
 1736613110797,
 1855501110996,
 2339203110659,
 835897085946]

In [182]:
real_user

[774953, 774971, 604362]

In [183]:
cloth_meta.insert(7, 'what', 12, True)
user_meta.insert(5, 'what', 12, True)

In [184]:
cloth_meta = cloth_meta[["newClothId", "idx", 'what']]
cloth_meta

Unnamed: 0,newClothId,idx,what
0,234005278848,25240,12
1,208155079211,20700,12
2,208155079212,15426,12
3,208155079213,23065,12
4,208155079214,1832,12
...,...,...,...
25911,2080918130502,10939,12
25912,2080918130504,7261,12
25913,2322135130506,21919,12
25914,2322135130507,20550,12


In [185]:
cloth_meta.to_json('style_outer_cloth_meta.json', orient='records', force_ascii=False)
user_meta.to_json('style_outer_user_meta.json', orient='records', force_ascii=False)

In [186]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness,what
0,488503,2.0,2.000000,2.000000,0.000000,12
1,488504,2.0,2.000000,3.000000,0.000000,12
2,488506,2.0,2.000000,2.000000,3.000000,12
3,488507,2.0,2.000000,1.666667,2.000000,12
4,488509,1.0,2.000000,2.000000,1.000000,12
...,...,...,...,...,...,...
93245,858056,2.2,2.000000,1.400000,1.600000,12
93246,858057,2.0,2.333333,1.000000,2.333333,12
93247,858058,2.0,3.000000,2.000000,3.000000,12
93248,858060,1.0,1.000000,1.000000,1.000000,12


# Pants

In [187]:
transaction = pd.read_json('./transaction_pants.json')
cloth_meta = pd.read_json('./final_pants.json')
user_meta = pd.read_json('./user_pants.json')
transaction = transaction[['userId', 'newClothId', 'shopCnt']]

In [188]:
transaction = transaction[transaction.shopCnt > 1]
transaction.shape

(273862, 3)

In [189]:
unique_id_cloth_meta = list(set(cloth_meta.newClothId))
transaction=transaction.loc[transaction.newClothId.isin(unique_id_cloth_meta),:]

In [190]:
a = transaction[['newClothId']]
a.insert(1, 'idx', 0, True)
a = a.drop_duplicates(['newClothId'])
a.idx = range(len(a))

In [191]:
transaction = pd.merge(transaction, a, how="left", on="newClothId")

In [192]:
user_meta = user_meta[style_user_column]

In [193]:
unique_id_user_meta = list(set(transaction.userId))
user_meta=user_meta.loc[user_meta.userId.isin(unique_id_user_meta),:]
user_meta = user_meta.reset_index(drop=True)
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,14135,1.428571,2.000000,2.000000,2.000000
1,14141,1.937500,2.000000,2.000000,2.250000
2,14146,1.000000,2.000000,2.000000,2.000000
3,14148,2.000000,2.000000,2.000000,2.000000
4,14158,1.870968,1.870968,2.000000,2.096774
...,...,...,...,...,...
115960,488479,1.884615,1.846154,1.846154,2.000000
115961,488483,1.000000,2.200000,1.000000,1.400000
115962,488490,1.583333,2.000000,1.916667,2.083333
115963,488491,2.000000,1.000000,2.000000,2.000000


In [194]:
new_user = pd.DataFrame(user_meta.loc[0]).transpose()
new_user_idx = new_user.index
new_user.loc[new_user_idx, ('userId')] = 8299
new_user.loc[new_user_idx, ('size')] = 0.3
new_user.loc[new_user_idx, ('bright')] = 0.3
new_user.loc[new_user_idx, ('color')] = 0.4
new_user.loc[new_user_idx, ('thickness')] = 0
new_user = new_user[['userId',  'size', 'bright', 'color', 'thickness']]
new_user

Unnamed: 0,userId,size,bright,color,thickness
0,8299.0,0.3,0.3,0.4,0.0


In [195]:
from copy import deepcopy
sub_user = deepcopy(user_meta)

In [196]:
sub_user.size = sub_user.apply(lambda x: abs(x.size-new_user['size'].iloc[0]), axis=1)
sub_user.bright = sub_user.apply(lambda x: abs(x.bright-new_user['bright'].iloc[0]), axis=1)
sub_user.color = sub_user.apply(lambda x: abs(x.color-new_user['color'].iloc[0]), axis=1)
sub_user.thickness = sub_user.apply(lambda x: abs(x.thickness-new_user['thickness'].iloc[0]), axis=1)

In [197]:
sub_col = list(sub_user.columns)[1:]

In [198]:
sub_user.insert(1, 'favorSum', 0, True)
sub_user.favorSum = sum(sub_user[col_i] for col_i in sub_col)
sub_user = sub_user.sort_values('favorSum')

In [199]:
real_user = []
for i in range(3):
    real_user.append(sub_user.iloc[i].userId)
real_user = [int(i) for i in real_user]
print(real_user)

[382066, 42286, 31870]


In [200]:
cloth_meta = cloth_meta[style_cloth_column]

In [201]:
cloth_meta

Unnamed: 0,newClothId,fit,feeling,stretch,visibility,thickness
0,1971752,0,0,0,0,0
1,726566,0,0,0,0,0
2,2213618,0,0,0,0,0
3,2265443,0,0,0,0,0
4,2005302,0,0,0,0,0
...,...,...,...,...,...,...
106873,1889420,0,0,0,0,0
106874,1275944,0,0,0,0,0
106875,1889504,0,0,0,0,0
106876,552678,0,0,0,0,0


In [202]:
cloth_meta = pd.merge(cloth_meta, a, how="left", on="newClothId")

In [203]:
unique_cloth_id = list(transaction['newClothId'].unique())
len(unique_cloth_id)

33530

In [204]:
a = cloth_meta.newClothId.isin(unique_cloth_id)
cloth_meta = cloth_meta[a]
cloth_meta.index = range(len(cloth_meta))

In [205]:
cloth_meta.idx = cloth_meta.idx.astype('int')

In [206]:
transaction_source = [(transaction['userId'][i], transaction['idx'][i]) for i in range(transaction.shape[0])]

In [207]:
print('userId의 유니크 개수 :', len(transaction.userId.unique()))
print('clothId의 유니크 개수 :', len(transaction.idx.unique()))
print('user테이블의 userId의 유니크 개수 :', len(user_meta.userId.unique()))
print('cloth테이블의 clothId의 유니크 개수 :', len(cloth_meta.idx.unique()))

userId의 유니크 개수 : 115965
clothId의 유니크 개수 : 33530
user테이블의 userId의 유니크 개수 : 115965
cloth테이블의 clothId의 유니크 개수 : 33530


In [208]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,14135,1.428571,2.000000,2.000000,2.000000
1,14141,1.937500,2.000000,2.000000,2.250000
2,14146,1.000000,2.000000,2.000000,2.000000
3,14148,2.000000,2.000000,2.000000,2.000000
4,14158,1.870968,1.870968,2.000000,2.096774
...,...,...,...,...,...
115960,488479,1.884615,1.846154,1.846154,2.000000
115961,488483,1.000000,2.200000,1.000000,1.400000
115962,488490,1.583333,2.000000,1.916667,2.083333
115963,488491,2.000000,1.000000,2.000000,2.000000


In [209]:
user_features_source = [(user_meta['userId'][i],
                        [user_meta['size'][i], user_meta['bright'][i], user_meta['color'][i],
                        user_meta['thickness'][i]]) for i in range(user_meta.shape[0])]

In [210]:
cloth_features_source = [(cloth_meta['idx'][i],
                        [cloth_meta['fit'][i], cloth_meta['feeling'][i], cloth_meta['stretch'][i],
                        cloth_meta['visibility'][i], cloth_meta['thickness'][i]]) for i in range(cloth_meta.shape[0])]

In [211]:
dataset = Dataset()
dataset.fit(users=transaction['userId'].unique(),
           items=transaction['idx'].unique(),
           user_features=user_meta[user_meta.columns[1:]].values.flatten(),
           item_features=cloth_meta[cloth_meta.columns[1:]].values.flatten())

In [212]:
interactions, weights = dataset.build_interactions(transaction_source)

In [213]:
user_features = dataset.build_user_features(user_features_source)

In [214]:
cloth_features = dataset.build_item_features(cloth_features_source)

In [215]:
train, test = random_train_test_split(interactions, test_percentage=0.2)
train, test = train.tocsr().tocoo(), test.tocsr().tocoo()
train_weights = train.multiply(weights).tocoo()
test_weights = test.multiply(weights).tocoo()

In [30]:
# no_components: 10
# learning_rate : 0.05
trials = Trials()
space = [hp.choice('no_components', range(10, 21, 5)),
         hp.uniform('learning_rate', 0.05, 0.1)]

In [85]:
# Define Objective Function
def objective(params):
    no_components, learning_rate = params
    model = LightFM(no_components=no_components,
                    learning_schedule='adagrad',
                    loss='warp',
                    learning_rate=learning_rate,
                    random_state=0)

    model.fit(interactions=train,
              item_features=cloth_features,
              user_features=user_features,
              sample_weight=train_weights,
              epochs=3,
              verbose=False)

    test_precision = precision_at_k(model, test, k=5, item_features=cloth_features, user_features=user_features).mean()
    print("no_comp: {}, lrn_rate: {:.5f}, precision: {:.5f}".format(
      no_components, learning_rate, test_precision))
    # test_auc = auc_score(model, test, item_features=item_features).mean()
    output = -test_precision

    if np.abs(output+1) < 0.01 or output < -1.0:
        output = 0.0

    return output

In [None]:
best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

In [31]:
learning_rate = round(best_params['learning_rate'], 3)
no_components = best_params['no_components']
print(learning_rate, no_components)

0.05 2


In [216]:
learning_rate = 0.03
no_components = 3

In [217]:
model = LightFM(no_components=no_components,
                learning_schedule='adagrad',
                loss='warp',
                learning_rate=learning_rate,
                random_state=0)

model.fit(interactions=interactions,
          item_features=cloth_features,
          user_features=user_features,
          sample_weight=weights,
          epochs=10,
          verbose=True)

Epoch: 100%|██████████| 10/10 [00:05<00:00,  1.79it/s]


<lightfm.lightfm.LightFM at 0x1f305516b80>

In [218]:
# 모델 저장하기
import pickle
with open('model_style_pants.pickle', 'wb') as fw:
    pickle.dump(model, fw)

In [219]:
# 모델 불러오기
import pickle
with open('model_style_pants.pickle', 'rb') as f:
    model_pants = pickle.load(f)

In [220]:
def sample_recommendation(model, idx, user_ids):
    cnt = 1
    for user_id in user_ids:
        if cnt==1:
            scores = model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
        else:
            scores += model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
    
    top_items = idx[np.argsort(-scores)]
    
    sub_clothes = []

    for x in top_items[:10]:
        sub_clothes.append(x)
    
    rec_clothes = []
    
    for i in sub_clothes:
        rec_clothes.append(cloth_meta[cloth_meta.idx==i].newClothId.iloc[0])
    
    return rec_clothes

In [221]:
rec_size_top = sample_recommendation(model_pants, cloth_meta.idx, real_user)
rec_size_top

[17611826560,
 214926918340,
 141285112055,
 23422909921,
 162996026109,
 166869912945,
 21801229810,
 131203916970,
 164792415298,
 174162315741]

In [222]:
real_user

[382066, 42286, 31870]

In [223]:
cloth_meta.insert(7, 'what', 13, True)
user_meta.insert(5, 'what', 13, True)

In [224]:
cloth_meta = cloth_meta[["newClothId", "idx", 'what']]
cloth_meta

Unnamed: 0,newClothId,idx,what
0,16225354884,11720,13
1,14988915210,15601,13
2,19409615513,29005,13
3,20193975542,22897,13
4,18466875743,17729,13
...,...,...,...
33525,180323169094,6877,13
33526,182037869103,19148,13
33527,194119769137,18084,13
33528,179115969145,16294,13


In [225]:
cloth_meta.to_json('style_pants_cloth_meta.json', orient='records', force_ascii=False)
user_meta.to_json('style_pants_user_meta.json', orient='records', force_ascii=False)

In [226]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness,what
0,14135,1.428571,2.000000,2.000000,2.000000,13
1,14141,1.937500,2.000000,2.000000,2.250000,13
2,14146,1.000000,2.000000,2.000000,2.000000,13
3,14148,2.000000,2.000000,2.000000,2.000000,13
4,14158,1.870968,1.870968,2.000000,2.096774,13
...,...,...,...,...,...,...
115960,488479,1.884615,1.846154,1.846154,2.000000,13
115961,488483,1.000000,2.200000,1.000000,1.400000,13
115962,488490,1.583333,2.000000,1.916667,2.083333,13
115963,488491,2.000000,1.000000,2.000000,2.000000,13


# Onepiece

In [227]:
transaction = pd.read_json('./transaction_onepiece.json')
cloth_meta = pd.read_json('./final_onepiece.json')
user_meta = pd.read_json('./user_onepiece.json')
transaction = transaction[['userId', 'newClothId', 'shopCnt']]

In [228]:
unique_id_cloth_meta = list(set(cloth_meta.newClothId))
transaction=transaction.loc[transaction.newClothId.isin(unique_id_cloth_meta),:]

In [229]:
a = transaction[['newClothId']]
a.insert(1, 'idx', 0, True)
a = a.drop_duplicates(['newClothId'])
a.idx = range(len(a))

In [230]:
transaction = pd.merge(transaction, a, how="left", on="newClothId")

In [231]:
user_meta = user_meta[style_user_column]

In [232]:
unique_id_user_meta = list(set(transaction.userId))
user_meta=user_meta.loc[user_meta.userId.isin(unique_id_user_meta),:]
user_meta = user_meta.reset_index(drop=True)
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,8297,2.0,1.0,2.0,0
1,8298,2.0,3.0,1.0,0
2,8299,2.0,1.0,1.0,0
3,8300,2.0,1.0,2.0,0
4,8301,1.0,1.0,1.0,0
...,...,...,...,...,...
5832,14129,1.5,1.5,1.0,0
5833,14130,2.0,2.0,2.0,0
5834,14131,2.0,2.0,2.0,0
5835,14132,2.0,3.0,1.0,0


In [233]:
new_user = pd.DataFrame(user_meta.loc[0]).transpose()
new_user_idx = new_user.index
new_user.loc[new_user_idx, ('userId')] = 8299
new_user.loc[new_user_idx, ('size')] = 0.3
new_user.loc[new_user_idx, ('bright')] = 0.3
new_user.loc[new_user_idx, ('color')] = 0.4
new_user.loc[new_user_idx, ('thickness')] = 0
new_user = new_user[['userId',  'size', 'bright', 'color', 'thickness']]
new_user

Unnamed: 0,userId,size,bright,color,thickness
0,8299.0,0.3,0.3,0.4,0.0


In [234]:
from copy import deepcopy
sub_user = deepcopy(user_meta)

In [235]:
sub_user.size = sub_user.apply(lambda x: abs(x.size-new_user['size'].iloc[0]), axis=1)
sub_user.bright = sub_user.apply(lambda x: abs(x.bright-new_user['bright'].iloc[0]), axis=1)
sub_user.color = sub_user.apply(lambda x: abs(x.color-new_user['color'].iloc[0]), axis=1)
sub_user.thickness = sub_user.apply(lambda x: abs(x.thickness-new_user['thickness'].iloc[0]), axis=1)

In [236]:
sub_col = list(sub_user.columns)[1:]

In [237]:
sub_user.insert(1, 'favorSum', 0, True)
sub_user.favorSum = sum(sub_user[col_i] for col_i in sub_col)
sub_user = sub_user.sort_values('favorSum')

In [238]:
real_user = []
for i in range(3):
    real_user.append(sub_user.iloc[i].userId)
real_user = [int(i) for i in real_user]
print(real_user)

[8936, 12844, 13730]


In [239]:
cloth_meta = cloth_meta[style_cloth_column]

In [240]:
cloth_meta

Unnamed: 0,newClothId,fit,feeling,stretch,visibility,thickness
0,1475941,0,0,0,0,0
1,1995082,3,1,1,2,1
2,1723870,0,0,0,0,0
3,2197982,0,0,0,0,0
4,2247825,0,0,0,0,0
...,...,...,...,...,...,...
9870,2324195,0,0,0,0,0
9871,14649944700,0,0,0,0,0
9872,2352427,0,0,0,0,0
9873,1457626,0,0,0,0,0


In [241]:
cloth_meta = pd.merge(cloth_meta, a, how="left", on="newClothId")

In [242]:
unique_cloth_id = list(transaction['newClothId'].unique())
len(unique_cloth_id)

2047

In [243]:
a = cloth_meta.newClothId.isin(unique_cloth_id)
cloth_meta = cloth_meta[a]
cloth_meta.index = range(len(cloth_meta))

In [244]:
cloth_meta.idx = cloth_meta.idx.astype('int')

In [245]:
transaction_source = [(transaction['userId'][i], transaction['idx'][i]) for i in range(transaction.shape[0])]

In [246]:
print('userId의 유니크 개수 :', len(transaction.userId.unique()))
print('clothId의 유니크 개수 :', len(transaction.idx.unique()))
print('user테이블의 userId의 유니크 개수 :', len(user_meta.userId.unique()))
print('cloth테이블의 clothId의 유니크 개수 :', len(cloth_meta.idx.unique()))

userId의 유니크 개수 : 5837
clothId의 유니크 개수 : 2047
user테이블의 userId의 유니크 개수 : 5837
cloth테이블의 clothId의 유니크 개수 : 2047


In [247]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,8297,2.0,1.0,2.0,0
1,8298,2.0,3.0,1.0,0
2,8299,2.0,1.0,1.0,0
3,8300,2.0,1.0,2.0,0
4,8301,1.0,1.0,1.0,0
...,...,...,...,...,...
5832,14129,1.5,1.5,1.0,0
5833,14130,2.0,2.0,2.0,0
5834,14131,2.0,2.0,2.0,0
5835,14132,2.0,3.0,1.0,0


In [248]:
user_features_source = [(user_meta['userId'][i],
                        [user_meta['size'][i], user_meta['bright'][i], user_meta['color'][i],
                        user_meta['thickness'][i]]) for i in range(user_meta.shape[0])]

In [249]:
cloth_features_source = [(cloth_meta['idx'][i],
                        [cloth_meta['fit'][i], cloth_meta['feeling'][i], cloth_meta['stretch'][i],
                        cloth_meta['visibility'][i], cloth_meta['thickness'][i]]) for i in range(cloth_meta.shape[0])]

In [250]:
dataset = Dataset()
dataset.fit(users=transaction['userId'].unique(),
           items=transaction['idx'].unique(),
           user_features=user_meta[user_meta.columns[1:]].values.flatten(),
           item_features=cloth_meta[cloth_meta.columns[1:]].values.flatten())

In [251]:
interactions, weights = dataset.build_interactions(transaction_source)

In [252]:
user_features = dataset.build_user_features(user_features_source)

In [253]:
cloth_features = dataset.build_item_features(cloth_features_source)

In [254]:
train, test = random_train_test_split(interactions, test_percentage=0.2)
train, test = train.tocsr().tocoo(), test.tocsr().tocoo()
train_weights = train.multiply(weights).tocoo()
test_weights = test.multiply(weights).tocoo()

In [255]:
# no_components: 10
# learning_rate : 0.05
trials = Trials()
space = [hp.choice('no_components', range(10, 21, 5)),
         hp.uniform('learning_rate', 0.05, 0.1)]

In [256]:
# Define Objective Function
def objective(params):
    no_components, learning_rate = params
    model = LightFM(no_components=no_components,
                    learning_schedule='adagrad',
                    loss='warp',
                    learning_rate=learning_rate,
                    random_state=0)

    model.fit(interactions=train,
              item_features=cloth_features,
              user_features=user_features,
              sample_weight=train_weights,
              epochs=3,
              verbose=False)

    test_precision = precision_at_k(model, test, k=5, item_features=cloth_features, user_features=user_features).mean()
    print("no_comp: {}, lrn_rate: {:.5f}, precision: {:.5f}".format(
      no_components, learning_rate, test_precision))
    # test_auc = auc_score(model, test, item_features=item_features).mean()
    output = -test_precision

    if np.abs(output+1) < 0.01 or output < -1.0:
        output = 0.0

    return output

In [257]:
best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

no_comp: 15, lrn_rate: 0.09908, precision: 0.01002    
no_comp: 20, lrn_rate: 0.06562, precision: 0.00957                                 
no_comp: 20, lrn_rate: 0.08387, precision: 0.00883                                 
no_comp: 20, lrn_rate: 0.07244, precision: 0.00868                                 
no_comp: 10, lrn_rate: 0.09425, precision: 0.00823                                 
no_comp: 10, lrn_rate: 0.05816, precision: 0.00987                                 
no_comp: 15, lrn_rate: 0.09676, precision: 0.00942                                 
no_comp: 20, lrn_rate: 0.09489, precision: 0.00972                                 
no_comp: 20, lrn_rate: 0.05142, precision: 0.00927                                 
no_comp: 10, lrn_rate: 0.09477, precision: 0.00927                                 
100%|██████████| 10/10 [00:03<00:00,  2.92trial/s, best loss: -0.010022438131272793]


In [258]:
learning_rate = round(best_params['learning_rate'], 3)
no_components = best_params['no_components']
print(learning_rate, no_components)

0.099 1


In [259]:
model = LightFM(no_components=no_components,
                learning_schedule='adagrad',
                loss='warp',
                learning_rate=learning_rate,
                random_state=0)

model.fit(interactions=interactions,
          item_features=cloth_features,
          user_features=user_features,
          sample_weight=weights,
          epochs=10,
          verbose=True)

Epoch: 100%|██████████| 10/10 [00:00<00:00, 112.59it/s]


<lightfm.lightfm.LightFM at 0x1f30655e520>

In [260]:
# 모델 저장하기
import pickle
with open('model_style_onepiece.pickle', 'wb') as fw:
    pickle.dump(model, fw)

In [261]:
# 모델 불러오기
import pickle
with open('model_style_onepiece.pickle', 'rb') as f:
    model_onepiece = pickle.load(f)

In [262]:
def sample_recommendation(model, idx, user_ids):
    cnt = 1
    for user_id in user_ids:
        if cnt==1:
            scores = model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
        else:
            scores += model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
    
    top_items = idx[np.argsort(-scores)]
    
    sub_clothes = []

    for x in top_items[:10]:
        sub_clothes.append(x)
    
    rec_clothes = []
    
    for i in sub_clothes:
        rec_clothes.append(cloth_meta[cloth_meta.idx==i].newClothId.iloc[0])
    
    return rec_clothes

In [263]:
rec_size_top = sample_recommendation(model_onepiece, cloth_meta.idx, real_user)
rec_size_top

[14435003027,
 19186162758,
 19345122958,
 21430112964,
 23792962867,
 21101903074,
 20163412966,
 19186162757,
 18594522933,
 13633132986]

In [264]:
real_user

[8936, 12844, 13730]

In [265]:
cloth_meta.insert(7, 'what', 14, True)
user_meta.insert(5, 'what', 14, True)

In [266]:
cloth_meta = cloth_meta[["newClothId", "idx", 'what']]
cloth_meta

Unnamed: 0,newClothId,idx,what
0,14094532674,279,14
1,14163822698,1239,14
2,18962342742,78,14
3,14829532807,1135,14
4,13617652829,1543,14
...,...,...,...
2042,20653784693,298,14
2043,13105704695,1719,14
2044,14406794698,837,14
2045,19412954699,1421,14


In [267]:
cloth_meta.to_json('style_onepiece_cloth_meta.json', orient='records', force_ascii=False)
user_meta.to_json('style_onepiece_user_meta.json', orient='records', force_ascii=False)

In [268]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness,what
0,8297,2.0,1.0,2.0,0,14
1,8298,2.0,3.0,1.0,0,14
2,8299,2.0,1.0,1.0,0,14
3,8300,2.0,1.0,2.0,0,14
4,8301,1.0,1.0,1.0,0,14
...,...,...,...,...,...,...
5832,14129,1.5,1.5,1.0,0,14
5833,14130,2.0,2.0,2.0,0,14
5834,14131,2.0,2.0,2.0,0,14
5835,14132,2.0,3.0,1.0,0,14


# Skirt

In [269]:
transaction = pd.read_json('./transaction_skirt.json')
cloth_meta = pd.read_json('./final_skirt.json')
user_meta = pd.read_json('./user_skirt.json')
transaction = transaction[['userId', 'newClothId', 'shopCnt']]

In [270]:
unique_id_cloth_meta = list(set(cloth_meta.newClothId))
transaction=transaction.loc[transaction.newClothId.isin(unique_id_cloth_meta),:]

In [271]:
a = transaction[['newClothId']]
a.insert(1, 'idx', 0, True)
a = a.drop_duplicates(['newClothId'])
a.idx = range(len(a))

In [272]:
transaction = pd.merge(transaction, a, how="left", on="newClothId")

In [273]:
user_meta = user_meta[style_user_column]

In [274]:
unique_id_user_meta = list(set(transaction.userId))
user_meta=user_meta.loc[user_meta.userId.isin(unique_id_user_meta),:]
user_meta = user_meta.reset_index(drop=True)
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,0,2.0,2.0,2.00,0.0
1,1,2.0,2.0,2.00,0.0
2,2,1.0,2.0,2.00,0.0
3,3,2.0,2.0,1.00,0.0
4,4,1.5,1.5,1.25,0.0
...,...,...,...,...,...
8292,8292,3.0,3.0,1.00,0.0
8293,8293,2.0,2.0,1.00,0.0
8294,8294,2.0,2.0,2.00,0.0
8295,8295,2.0,2.0,2.00,0.0


In [275]:
new_user = pd.DataFrame(user_meta.loc[0]).transpose()
new_user_idx = new_user.index
new_user.loc[new_user_idx, ('userId')] = 8299
new_user.loc[new_user_idx, ('size')] = 0.3
new_user.loc[new_user_idx, ('bright')] = 0.3
new_user.loc[new_user_idx, ('color')] = 0.4
new_user.loc[new_user_idx, ('thickness')] = 0
new_user = new_user[['userId',  'size', 'bright', 'color', 'thickness']]
new_user

Unnamed: 0,userId,size,bright,color,thickness
0,8299.0,0.3,0.3,0.4,0.0


In [276]:
from copy import deepcopy
sub_user = deepcopy(user_meta)

In [277]:
sub_user.size = sub_user.apply(lambda x: abs(x.size-new_user['size'].iloc[0]), axis=1)
sub_user.bright = sub_user.apply(lambda x: abs(x.bright-new_user['bright'].iloc[0]), axis=1)
sub_user.color = sub_user.apply(lambda x: abs(x.color-new_user['color'].iloc[0]), axis=1)
sub_user.thickness = sub_user.apply(lambda x: abs(x.thickness-new_user['thickness'].iloc[0]), axis=1)

In [278]:
sub_col = list(sub_user.columns)[1:]

In [279]:
sub_user.insert(1, 'favorSum', 0, True)
sub_user.favorSum = sum(sub_user[col_i] for col_i in sub_col)
sub_user = sub_user.sort_values('favorSum')

In [280]:
real_user = []
for i in range(3):
    real_user.append(sub_user.iloc[i].userId)
real_user = [int(i) for i in real_user]
print(real_user)

[6802, 1006, 5145]


In [281]:
cloth_meta = cloth_meta[style_cloth_column]

In [282]:
cloth_meta

Unnamed: 0,newClothId,fit,feeling,stretch,visibility,thickness
0,2018790,5,1,5,5,3
1,1844574,0,0,0,0,0
2,1672093,3,3,1,5,5
3,1687533,2,3,1,5,4
4,2338446,0,0,0,0,0
...,...,...,...,...,...,...
8871,21172562649,2,2,5,5,3
8872,21172562648,2,2,5,5,3
8873,23400702652,2,2,4,5,3
8874,23400702653,2,2,4,5,3


In [283]:
cloth_meta = pd.merge(cloth_meta, a, how="left", on="newClothId")

In [284]:
unique_cloth_id = list(transaction['newClothId'].unique())
len(unique_cloth_id)

2654

In [285]:
a = cloth_meta.newClothId.isin(unique_cloth_id)
cloth_meta = cloth_meta[a]
cloth_meta.index = range(len(cloth_meta))

In [286]:
cloth_meta.idx = cloth_meta.idx.astype('int')

In [287]:
transaction_source = [(transaction['userId'][i], transaction['idx'][i]) for i in range(transaction.shape[0])]

In [288]:
print('userId의 유니크 개수 :', len(transaction.userId.unique()))
print('clothId의 유니크 개수 :', len(transaction.idx.unique()))
print('user테이블의 userId의 유니크 개수 :', len(user_meta.userId.unique()))
print('cloth테이블의 clothId의 유니크 개수 :', len(cloth_meta.idx.unique()))

userId의 유니크 개수 : 8297
clothId의 유니크 개수 : 2654
user테이블의 userId의 유니크 개수 : 8297
cloth테이블의 clothId의 유니크 개수 : 2654


In [289]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness
0,0,2.0,2.0,2.00,0.0
1,1,2.0,2.0,2.00,0.0
2,2,1.0,2.0,2.00,0.0
3,3,2.0,2.0,1.00,0.0
4,4,1.5,1.5,1.25,0.0
...,...,...,...,...,...
8292,8292,3.0,3.0,1.00,0.0
8293,8293,2.0,2.0,1.00,0.0
8294,8294,2.0,2.0,2.00,0.0
8295,8295,2.0,2.0,2.00,0.0


In [290]:
user_features_source = [(user_meta['userId'][i],
                        [user_meta['size'][i], user_meta['bright'][i], user_meta['color'][i],
                        user_meta['thickness'][i]]) for i in range(user_meta.shape[0])]

In [291]:
cloth_features_source = [(cloth_meta['idx'][i],
                        [cloth_meta['fit'][i], cloth_meta['feeling'][i], cloth_meta['stretch'][i],
                        cloth_meta['visibility'][i], cloth_meta['thickness'][i]]) for i in range(cloth_meta.shape[0])]

In [292]:
dataset = Dataset()
dataset.fit(users=transaction['userId'].unique(),
           items=transaction['idx'].unique(),
           user_features=user_meta[user_meta.columns[1:]].values.flatten(),
           item_features=cloth_meta[cloth_meta.columns[1:]].values.flatten())

In [293]:
interactions, weights = dataset.build_interactions(transaction_source)

In [294]:
user_features = dataset.build_user_features(user_features_source)

In [295]:
cloth_features = dataset.build_item_features(cloth_features_source)

In [296]:
train, test = random_train_test_split(interactions, test_percentage=0.2)
train, test = train.tocsr().tocoo(), test.tocsr().tocoo()
train_weights = train.multiply(weights).tocoo()
test_weights = test.multiply(weights).tocoo()

In [297]:
# no_components: 10
# learning_rate : 0.05
trials = Trials()
space = [hp.choice('no_components', range(10, 21, 5)),
         hp.uniform('learning_rate', 0.05, 0.1)]

In [298]:
# Define Objective Function
def objective(params):
    no_components, learning_rate = params
    model = LightFM(no_components=no_components,
                    learning_schedule='adagrad',
                    loss='warp',
                    learning_rate=learning_rate,
                    random_state=0)

    model.fit(interactions=train,
              item_features=cloth_features,
              user_features=user_features,
              sample_weight=train_weights,
              epochs=3,
              verbose=False)

    test_precision = precision_at_k(model, test, k=5, item_features=cloth_features, user_features=user_features).mean()
    print("no_comp: {}, lrn_rate: {:.5f}, precision: {:.5f}".format(
      no_components, learning_rate, test_precision))
    # test_auc = auc_score(model, test, item_features=item_features).mean()
    output = -test_precision

    if np.abs(output+1) < 0.01 or output < -1.0:
        output = 0.0

    return output

In [299]:
best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

no_comp: 15, lrn_rate: 0.08926, precision: 0.01167    
no_comp: 20, lrn_rate: 0.06650, precision: 0.01218                               
no_comp: 20, lrn_rate: 0.09574, precision: 0.01095                                 
no_comp: 20, lrn_rate: 0.09329, precision: 0.01075                                 
no_comp: 15, lrn_rate: 0.05361, precision: 0.01085                                 
no_comp: 10, lrn_rate: 0.08258, precision: 0.01126                                 
no_comp: 10, lrn_rate: 0.05632, precision: 0.01177                                 
no_comp: 10, lrn_rate: 0.07900, precision: 0.01085                                 
no_comp: 10, lrn_rate: 0.08728, precision: 0.01085                                 
no_comp: 20, lrn_rate: 0.05193, precision: 0.01095                                 
100%|██████████| 10/10 [00:05<00:00,  1.81trial/s, best loss: -0.012180143035948277]


In [300]:
learning_rate = round(best_params['learning_rate'], 3)
no_components = best_params['no_components']
print(learning_rate, no_components)

0.067 2


In [301]:
model = LightFM(no_components=no_components,
                learning_schedule='adagrad',
                loss='warp',
                learning_rate=learning_rate,
                random_state=0)

model.fit(interactions=interactions,
          item_features=cloth_features,
          user_features=user_features,
          sample_weight=weights,
          epochs=10,
          verbose=True)

Epoch: 100%|██████████| 10/10 [00:00<00:00, 65.44it/s]


<lightfm.lightfm.LightFM at 0x1f365e4ff70>

In [302]:
# 모델 저장하기
import pickle
with open('model_style_skirt.pickle', 'wb') as fw:
    pickle.dump(model, fw)

In [303]:
# 모델 불러오기
import pickle
with open('model_style_skirt.pickle', 'rb') as f:
    model_skirt = pickle.load(f)

In [304]:
def sample_recommendation(model, idx, user_ids):
    cnt = 1
    for user_id in user_ids:
        if cnt==1:
            scores = model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
        else:
            scores += model.predict(int(user_meta[user_meta.userId==user_id].index[0]), np.arange(len(cloth_meta)))
    
    top_items = idx[np.argsort(-scores)]
    
    sub_clothes = []

    for x in top_items[:10]:
        sub_clothes.append(x)
    
    rec_clothes = []
    
    for i in sub_clothes:
        rec_clothes.append(cloth_meta[cloth_meta.idx==i].newClothId.iloc[0])
    
    return rec_clothes

In [305]:
rec_size_top = sample_recommendation(model_skirt, cloth_meta.idx, real_user)
rec_size_top

[150926840,
 2054486283,
 1907111403,
 117363976,
 2017517424,
 1773210461,
 1618698368,
 1248001459,
 1969751296,
 1882209439]

In [306]:
real_user

[6802, 1006, 5145]

In [307]:
cloth_meta.insert(7, 'what', 15, True)
user_meta.insert(5, 'what', 15, True)

In [308]:
cloth_meta = cloth_meta[["newClothId", "idx", 'what']]
cloth_meta

Unnamed: 0,newClothId,idx,what
0,161812522,1682,15
1,161812521,896,15
2,185310349,1662,15
3,1884867110,2304,15
4,1629303146,965,15
...,...,...,...
2649,21172562650,22,15
2650,21172562649,2,15
2651,21172562648,531,15
2652,23400702652,1441,15


In [309]:
cloth_meta.to_json('style_skirt_cloth_meta.json', orient='records', force_ascii=False)
user_meta.to_json('style_skirt_user_meta.json', orient='records', force_ascii=False)

In [310]:
user_meta

Unnamed: 0,userId,size,bright,color,thickness,what
0,0,2.0,2.0,2.00,0.0,15
1,1,2.0,2.0,2.00,0.0,15
2,2,1.0,2.0,2.00,0.0,15
3,3,2.0,2.0,1.00,0.0,15
4,4,1.5,1.5,1.25,0.0,15
...,...,...,...,...,...,...
8292,8292,3.0,3.0,1.00,0.0,15
8293,8293,2.0,2.0,1.00,0.0,15
8294,8294,2.0,2.0,2.00,0.0,15
8295,8295,2.0,2.0,2.00,0.0,15
