# Import package

In [29]:
import sys
sys.path.append('..')

import pydot
from src import envs

from tqdm import tqdm_notebook as tqdm
import pandas as pd
import requests
from keras.layers import (Concatenate, Dense, Dot, Dropout, Embedding, Flatten,
                          Input, Lambda)
from keras.layers.merge import concatenate, dot
import keras
# %load solutions/deep_implicit_feedback_recsys.py
from keras.models import Model, Sequential
from keras.regularizers import l2
import tensorflow as tf

from sklearn.metrics import roc_auc_score
from itertools import product
from time import sleep

from keras.utils import plot_model
import lightgbm as lgb

# Data

In [30]:
data_reset = envs.req_reset(verbose=True, envs='third')
original_df = pd.DataFrame({
    'action': data_reset['action_history'],
    'rewards': data_reset['rewards_history'],
    'state': data_reset['state_history']
})
nb_users, nb_items = data_reset['nb_users'], data_reset['nb_items']

------Summary-----
key:action_history, 	size:200, 	type:<class 'int'>
key:	nb_items, 	value:	30
key:	nb_users, 	value:	100
key:next_state, 	size:30, 	type:<class 'list'>
key:rewards_history, 	size:200, 	type:<class 'int'>
key:state_history, 	size:200, 	type:<class 'list'>


In [31]:
data_predict = envs.req_predict(predict=0, verbose=True, envs='third')

------Summary-----
key:reward,	value:0
key:state,	size:30,	type:<class 'list'>


In [32]:
def export_data():
    '''
        Export data from envs-3
        
        Returns:
            nb_users: number of users
            nb_items: number of items
            next_state: next state
            df : initial training set data frame
    
    '''
    
    
    data_reset = envs.req_reset(verbose=False, envs='third')
    original_data = []
    variables = []
    for idx in range(len(data_reset['action_history'])):
        action = data_reset['action_history'][idx]
        reward = data_reset['rewards_history'][idx]
        state = data_reset['state_history'][idx]
        
        for idx in range(len(state)):
            row = { 'user':state[idx][0],
                    'item':state[idx][1],
                    'price':state[idx][2],
                    'reward': reward if idx == action else 0,
                    'weight': 1 if idx == action else 0  
                }

            variables.append(state[idx][3:])
            original_data.append(row)

    variable_df = pd.DataFrame(variables, columns=['v0', 'v1', 'v2', 'v3', 'v4'])
    data_df = pd.DataFrame(original_data)
    variable_df.index = data_df.index 
    return data_reset['nb_users'], data_reset['nb_items'], data_reset['next_state'],pd.concat([data_df, variable_df], axis=1)
    

# Utils

In [33]:
def average_roc_auc(match_model, data_train, data_test):
    """Compute the ROC AUC for each user and average over users"""
    max_user = max(data_train['user'].max(), data_test['user'].max())
    max_item = max(data_train['item'].max(), data_test['item'].max())
    user_auc_scores = []
    for user in range(1, max_user + 1):
        pos_item_train = data_train[data_train['user'] == user]
        pos_item_test = data_test[data_test['user'] == user]
        
        # Consider all the items already seen in the training set
        all_items = np.arange(1, max_item + 1)
        items_to_rank = np.setdiff1d(all_items, pos_item_train['item'].values)
        
        # Ground truth: return 1 for each item positively present in the test set
        # and 0 otherwise.
        expected = np.in1d(items_to_rank, pos_item_test['item'].values)
        
        if np.sum(expected) >= 1:
            # At least one positive test value to rank
            repeated_user = np.empty_like(items_to_rank)
            repeated_user.fill(user)

            predicted = match_model.predict([repeated_user, items_to_rank],
                                            batch_size=4096)
            user_auc_scores.append(roc_auc_score(expected, predicted))

    return sum(user_auc_scores) / len(user_auc_scores)

def sample_triplets(pos_data, random_seed=0):
    """Sample negatives at random"""
    rng = np.random.RandomState(random_seed)
    users = pos_data['user'].values
    pos_items = pos_data[pos_data.reward>0]['item'].values

    neg_items = pos_data[pos_data.reward==0]['item'].values

    return [users, pos_items, neg_items]

def identity_loss(y_true, y_pred):
    """Ignore y_true and return the mean of y_pred
    
    This is a hack to work-around the design of the Keras API that is
    not really suited to train networks with a triplet loss by default.
    """
    return tf.reduce_mean(y_pred + 0 * y_true)


def margin_comparator_loss(inputs, margin=1.):
    """Comparator loss for a pair of precomputed similarities
    
    If the inputs are cosine similarities, they each have range in
    (-1, 1), therefore their difference have range in (-2, 2). Using
    a margin of 1. can therefore make sense.

    If the input similarities are not normalized, it can be beneficial
    to use larger values for the margin of the comparator loss.
    """
    positive_pair_sim, negative_pair_sim = inputs
    return tf.maximum(negative_pair_sim - positive_pair_sim + margin, 0)

def make_interaction_mlp(input_dim, n_hidden=1, hidden_size=64,
                         dropout=0, l2_reg=None):
    """Build the shared multi layer perceptron"""
    mlp = Sequential()
    if n_hidden == 0:
        # Plug the output unit directly: this is a simple
        # linear regression model. Not dropout required.
        mlp.add(Dense(1, input_dim=input_dim,
                      activation='relu', kernel_regularizer=l2_reg))
    else:
        mlp.add(Dense(hidden_size, input_dim=input_dim,
                      activation='relu', kernel_regularizer=l2_reg))
        mlp.add(Dropout(dropout))
        for i in range(n_hidden - 1):
            mlp.add(Dense(hidden_size, activation='relu',
                          W_regularizer=l2_reg))
            mlp.add(Dropout(dropout))
        mlp.add(Dense(1, activation='relu', kernel_regularizer=l2_reg))
    return mlp


# Model

In [81]:
def test_agent(agent, episodes=1000,  epochs=3, online=False):
    '''
    Param:
        agent : agent use to be test
        episodes : number to test
        
    Returns:
        rewards : rewards hist
        takes : actions hist
    '''
    
    total_rewards, total_takes = [], []
    for _ in range(epochs):
        nb_users, nb_items, next_state, hist_df = export_data()
        agent.reset_state(nb_users, nb_items, hist_df)

        rewards = []
        takes = []

        for i in tqdm(range(episodes)):
            sleep(0.01)
            action = agent.predict(next_state)
            d = envs.req_predict(0, envs='third')
            # error for last prediction
            rewards.append(d['reward'])
            takes.append(d['reward'] >0)
            # predict next

            if online:
                agent.update(next_state,  action, d['reward'])

            next_state = d['state'] 


        print(f'Take Rates:{sum(takes)/len(takes)}, Mean Reawards:{sum(rewards) / len(rewards)}')
        total_rewards.extend(rewards), total_takes.extend(takes)
    print(f'Total Take Rates:{sum(total_takes)/len(total_takes)}, Total Mean Reawards:{sum(total_rewards) / len(total_rewards)}')
    return total_rewards, total_takes

## Model 1- baseline

In [35]:
class BaseAgent(object):
    
    def __init__(self, name='baselien'):
        self.name = name
    
    
    def reset_state(self, nb_users, nb_items, hist_df):
        '''
        Reset agent's model and fit model with data specified by args
        
        Params:
            data : training data used to train data
        '''
        pass
    
    def predict(self, state):
        '''
        Returns action by state
        '''
        state = np.array(state)
        return np.argmax(state[:,2])
    
    def update(self, state, action ,reward):
        '''
        Update model with new data.
        This function is used for online model
        '''
        pass

In [52]:
class CheapestAgent(BaseAgent):

    def predict(self, state):
        '''
        Returns action by state
        '''
        state = np.array(state)
        return np.argmin(state[:,2])

In [47]:
class ConstantAgent(BaseAgent):

    def predict(self, state):
        '''
        Returns action by state
        '''
        return 0

In [45]:
class RandomAgent(BaseAgent):

    def predict(self, state):
        '''
        Returns action by state
        '''
        return np.random.randint(0, len(state))

### Test

In [77]:
agent1 = BaseAgent()
reawrds, takes = test_agent(agent1, 1000, epochs=3)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.176, Mean Reawards:123.73870656619684


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.252, Mean Reawards:81.46846021022411


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.261, Mean Reawards:148.94215353336074
Total Take Rates:0.22966666666666666, Total Mean Reawards:118.04977343659473


In [48]:
agent2 = ConstantAgent()
reawrds, takes = test_agent(agent2, 1000, epochs=3)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.337, Mean Reawards:202.64106557681447


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.255, Mean Reawards:107.58912864876962


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.265, Mean Reawards:147.5803381236969
Total Take Rates:0.2856666666666667, Total Mean Reawards:152.60351078309426


In [46]:
agent3 = RandomAgent()
reawrds, takes = test_agent(agent3, 1000, epochs=3)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.235, Mean Reawards:135.89172689661766


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.304, Mean Reawards:120.9870440221462


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.222, Mean Reawards:144.20706655183284
Total Take Rates:0.25366666666666665, Total Mean Reawards:133.69527915686444


In [54]:
agent4 = CheapestAgent()
reawrds, takes = test_agent(agent4, 1000, epochs=3)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.128, Mean Reawards:96.5379676655509


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.227, Mean Reawards:85.681370530848


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.268, Mean Reawards:130.9737983406737
Total Take Rates:0.20766666666666667, Total Mean Reawards:104.39771217902408


## Model-2

In [62]:
class Agent1(BaseAgent):
    
    def __init__(self, epochs=50, model_params =None, verbose=False ):
        self.epochs = epochs
        self.verbose= verbose
        if model_params == None:
            self.model_params = dict(
                user_dim=32,
                item_dim=64,
                n_hidden=1,
                hidden_size=128,
                dropout=0.1,
                l2_reg=0
            )
        else:
            self.model_params = model_params

    
         
    def reset_state(self, nb_users, nb_items, hist_df, epoch=10):
        ## reset data
        self.nb_users, self.nb_items, self.hist_df = nb_users, nb_items, hist_df        
        
        ## extrain triple data
        triplet_inputs = self._data_preprocessing(self.hist_df)
        print('Data Size:', len(triplet_inputs[0]))
        
        ## reset model
        self.model, self.match_model = self._build_model(self.nb_users, self.nb_items, **self.model_params)
        self.compile()
        
        ## Training model with new data
        self.fit(triplet_inputs, epochs = self.epochs, verbose=self.verbose)
        
        
    def fit(self, triplet_inputs,batch_size=64, epochs=10, verbose=True):
        fake_y = np.ones_like( triplet_inputs[0])
        self.model.fit(triplet_inputs, fake_y, shuffle=True,validation_split=0.1,
                       batch_size=batch_size, epochs=epochs, verbose =verbose)

        
    def predict(self, state):
        return np.argmax(self.predict_prob(state))
    
    
    def predict_prob(self, state):
        state = np.array(state, dtype=int)
        user_ids = state[:,0].ravel()
        item_ids = state[:,1].ravel()
        return self.match_model.predict( [user_ids, item_ids])
    
    
    
    def update(self, state, action ,reward):
        '''
        Update model with new data.
        This function is used for online model
        '''
        user_id = state[action][0]
        item_id = state[action][1]

        row = { 'user':state[action][0],
                    'item':state[action][1],
                    'price':state[action][2],
                    'reward': reward,
                    'weight': 1,
                    'v0':state[action][3],
                    'v1':state[action][4],
                    'v2':state[action][5],
                    'v3':state[action][6],
                    'v4':state[action][7],
                 }
        
        try:
            self.hist_df = self.hist_df.append(row, ignore_index=True)
            triplet_inputs = self._data_preprocessing(self.hist_df[self.hist_df.user == user_id])
            
            if len(triplet_inputs[0]) == 0:
                return
            self.fit(triplet_inputs, epochs = 1, verbose=self.verbose)
        except Exception as e:
            print('user_id:', user_id)
            print(state)
            print(triplet_inputs)
            raise Exception(e)

            
            
        
    def _build_model(self, n_users=32, n_items=64, user_dim=32, item_dim=64,
                     n_hidden=3, hidden_size=64, dropout=0, l2_reg=0):
        
        user_input = Input((1,), name='user_input')
        positive_item_input = Input((1,), name='positive_item_input')
        negative_item_input = Input((1,), name='negative_item_input')

        # - embeddings
        l2_reg = None if l2_reg == 0 else l2(l2_reg)
        user_layer = Embedding(n_users, user_dim, input_length=1,
                               name='user_embedding', embeddings_regularizer=l2_reg)
        user_embedding = Flatten()(user_layer(user_input))

        item_layer = Embedding(n_items, item_dim, input_length=1,
                               name="item_embedding", embeddings_regularizer=l2_reg)

        positive_item_embedding = Flatten()(item_layer(positive_item_input))
        negative_item_embedding = Flatten()(item_layer(negative_item_input))

        positive_embeddings_pair = concatenate([user_embedding, positive_item_embedding],
                                         name="positive_embeddings_pair")
        positive_embeddings_pair = Dropout(dropout)(positive_embeddings_pair)

        negative_embeddings_pair = concatenate([user_embedding, negative_item_embedding],

                                         name="negative_embeddings_pair")
        negative_embeddings_pair = Dropout(dropout)(negative_embeddings_pair)


        interaction_layers = make_interaction_mlp(
            user_dim + item_dim, n_hidden=n_hidden, hidden_size=hidden_size,
            dropout=dropout, l2_reg=l2_reg)

        positive_mlp = interaction_layers(positive_embeddings_pair)
        negative_mlp = interaction_layers(negative_embeddings_pair)

        # The triplet network model, only used for training
        triplet_loss = Lambda(margin_comparator_loss,
                              name='comparator_loss',
                              output_shape=(1,))(
            [positive_mlp, negative_mlp])
        
        match_model = Model(inputs=[user_input, positive_item_input],
                    outputs=positive_mlp)

        model = Model(inputs=[user_input,
                                      positive_item_input,
                                      negative_item_input],
                              outputs=triplet_loss)

        return model, match_model
     
    
    def _data_preprocessing(self, df):
        '''
        Extrait training data
        '''
        grouped = df.groupby(by='user')
        all_items = set(range(nb_items))

        users, pos_items, neg_items = [], [], []
        for idx, ele in grouped:
            user_id = idx
            pos_ids = ele[ele.reward>0]['item'].values
            neg_ids = ele[ele.reward == 0]['item'].values
            product_list = list(zip(*list(product([user_id], pos_ids, neg_ids))))
            
            if len(product_list) == 0:
                continue
#             print(idx, pos_ids.shape, neg_ids.shape, len(product_list[0]))
            users.extend(product_list[0])
            pos_items.extend(product_list[1])
            neg_items.extend(product_list[2])
        
#         print(len(users))
        assert len(users) == len(pos_items)
        assert len(users) == len(neg_items)
        return [users, pos_items, neg_items]
    
    

    def compile(self, loss=identity_loss, optimizer='adam'):
        self.model.compile(loss=loss, optimizer=optimizer)

### Test

In [63]:
agent11 = Agent1(epochs=50, verbose=False)
rewards, takes = test_agent(agent11, 1000, 3)

Data Size: 5208


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.254, Mean Reawards:161.05139420710714
Data Size: 4883


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.255, Mean Reawards:139.30751076420455
Data Size: 5109


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.326, Mean Reawards:141.90920049185283
Total Take Rates:0.2783333333333333, Total Mean Reawards:147.4227018210556


In [64]:
agent12 = Agent1(epochs=10, verbose=False)
rewards, takes = test_agent(agent12, 1000, 3, online=True)

Data Size: 5275


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.317, Mean Reawards:142.67439069934554
Data Size: 5076


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.224, Mean Reawards:100.8706587682458
Data Size: 5049


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.235, Mean Reawards:88.9842308582559
Total Take Rates:0.25866666666666666, Total Mean Reawards:110.84309344194986


## Model-3

In [65]:
class Agent2(BaseAgent):
    
    def __init__(self, epochs=50, model_params =None, verbose=False ):
        self.epochs = epochs
        self.verbose= verbose
        if model_params == None:
            self.model_params = dict(
                user_dim=32,
                item_dim=64,
                n_hidden=1,
                hidden_size=128,
                dropout=0.1,
                l2_reg=0
            )
        else:
            self.model_params = model_params

    
         
    def reset_state(self, nb_users, nb_items, hist_df, epoch=10):
        ## reset data
        self.nb_users, self.nb_items, self.hist_df = nb_users, nb_items, hist_df        
        
        ## extrain triple data
        print('Extract Data ......')
        triplet_inputs = self._data_preprocessing(self.hist_df)
        print('Data Size:', len(triplet_inputs[0]))
        
        ## reset model
        print('Create Model ......')
        self.model, self.match_model = self._build_model(self.nb_users, self.nb_items, **self.model_params)
        self.compile()
        
        ## Training model with new data
        print('Fit Model ......')
        self.fit(triplet_inputs, epochs = self.epochs, verbose=self.verbose)
        
        
    def fit(self, triplet_inputs,batch_size=64, epochs=10, verbose=True):
        fake_y = np.ones_like( triplet_inputs[0])
        self.model.fit(triplet_inputs, fake_y, shuffle=True,
                       batch_size=batch_size, epochs=epochs, verbose=verbose)

        
    def predict(self, state):
        return np.argmax(self.predict_prob(state))
    
    
    def predict_prob(self, state):
        state = np.array(state, dtype=int)
        user_ids = state[:,0].ravel()
        item_ids = state[:,1].ravel()
        meta_vec = state[:,3:]
        
        return self.match_model.predict( [user_ids, item_ids, meta_vec])
    
    
    
    def update(self, state, action ,reward):
        '''
        Update model with new data.
        This function is used for online model
        '''
        user_id = state[action][0]
        item_id = state[action][1]

        row = { 'user':state[action][0],
                    'item':state[action][1],
                    'price':state[action][2],
                    'reward': reward,
                    'weight': 1,
                    'v0':state[action][3],
                    'v1':state[action][4],
                    'v2':state[action][5],
                    'v3':state[action][6],
                    'v4':state[action][7],
                 }
        try:
            self.hist_df = self.hist_df.append(row, ignore_index=True)
            triplet_inputs = self._data_preprocessing(self.hist_df[self.hist_df.user == user_id])
            
            if triplet_inputs == None or len(triplet_inputs[0]) == 0:
                return
            self.fit(triplet_inputs, epochs = 1, verbose=self.verbose)
        except Exception as e:
            print(('user_id:', user_id))
            print(state)
            print(triplet_inputs)
            raise Exception(e)

            
            
        
    def _build_model(self, n_users=32, n_items=64, user_dim=32, item_dim=64,
                     n_hidden=3, hidden_size=64, dropout=0, l2_reg=0):
        
        user_input = Input((1,), name='user_input')
        positive_item_input = Input((1,), name='positive_item_input')
        positive_meta_input = Input((5,), name='positive_meta_item')
        negative_item_input = Input((1,), name='negative_item_input')
        negative_meta_input = Input((5,), name='negative_meta_input')
        

        # - embeddings
        l2_reg = None if l2_reg == 0 else l2(l2_reg)
        user_layer = Embedding(n_users, user_dim, input_length=1,
                               name='user_embedding', embeddings_regularizer=l2_reg)
        user_embedding = Flatten()(user_layer(user_input))

        item_layer = Embedding(n_items, item_dim, input_length=1,
                               name="item_embedding", embeddings_regularizer=l2_reg)

        positive_item_embedding = Flatten()(item_layer(positive_item_input))
        negative_item_embedding = Flatten()(item_layer(negative_item_input))

        positive_embeddings_pair = concatenate([user_embedding, positive_item_embedding, positive_meta_input],
                                         name="positive_embeddings_pair")
        positive_embeddings_pair = Dropout(dropout)(positive_embeddings_pair)

        negative_embeddings_pair = concatenate([user_embedding, negative_item_embedding, negative_meta_input],

                                         name="negative_embeddings_pair")
        negative_embeddings_pair = Dropout(dropout)(negative_embeddings_pair)


        interaction_layers = make_interaction_mlp(
            user_dim + item_dim + 5, n_hidden=n_hidden, hidden_size=hidden_size,
            dropout=dropout, l2_reg=l2_reg)

        positive_mlp = interaction_layers(positive_embeddings_pair)
        negative_mlp = interaction_layers(negative_embeddings_pair)

        # The triplet network model, only used for training
        triplet_loss = Lambda(margin_comparator_loss,
                              name='comparator_loss',
                              output_shape=(1,))(
            [positive_mlp, negative_mlp])
        
        match_model = Model(inputs=[user_input, positive_item_input, positive_meta_input],
                    outputs=positive_mlp)

        model = Model(inputs=[user_input,
                              positive_item_input,
                              positive_meta_input,
                              negative_item_input,
                              negative_meta_input
                             ],
                              outputs=triplet_loss)

        return model, match_model
     
    
    def _data_preprocessing(self, df):
        '''
        Extrait training data
        
        '''
        pos_grouped = df.groupby(by='user')
        all_items = set(range(self.nb_items))
    
        users, pos_items,pos_meta, neg_items, neg_meta = [], [], [],[], []
        for idx, ele in pos_grouped:
            pos_rows = ele[ele['reward']>0].values
            neg_rows = ele[ele['reward'] == 0.0].values
            product_list = list(zip(*list(product(pos_rows, neg_rows))))
        #     pos_id = .extend(product_list[1])
            if len(product_list) == 0:
                continue
#             print(len(product_list))
            pos_rows = np.array(product_list[0])
            neg_rows = np.array(product_list[1])
#             print(pos_rows.shape)
           
#             pos_rows = product_list[1]
            users.append(pos_rows[:,0])
            pos_items.append(pos_rows[:,1])
            pos_meta.append(pos_rows[:,-5:])
            neg_items.append(neg_rows[:,1])
            neg_meta.append(neg_rows[:,-5:])
#             print((idx, pos_rows[:,0].shape, pos_rows[:,1].shape, pos_rows[:,-5:].shape, neg_rows[:,1].shape, neg_rows[:,-5:].shape))

#             break
#             for ele in product_list[0]:
#                 pos_items.append(ele[0])
#                 pos_meta.append(ele[1:])
#                 users.append(idx)

#             for ele in product_list[1]:
#                 neg_items.append(ele[0])
#                 neg_meta.append(ele[1:])
                
            assert len(users) == len(pos_items)
            assert len(users) == len(pos_meta)
            assert len(users) == len(neg_items)
            assert len(users) == len(pos_meta)
            
        if len(users) == 0:
            return None
        return [np.concatenate(users), np.concatenate(pos_items),np.vstack(pos_meta), np.concatenate(neg_items), np.vstack(neg_meta)]
            
#         return [users, pos_items, pos_meta,  neg_items, neg_meta]
    

    def compile(self, loss=identity_loss, optimizer='adam'):
        self.model.compile(loss=loss, optimizer=optimizer)

### Test

In [80]:
agent21 = Agent2(epochs=10, verbose=False)
rewards, takes = test_agent(agent21, 1000, 3)

Extract Data ......
Data Size: 5182
Create Model ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.305, Mean Reawards:181.71171984300446
Extract Data ......
Data Size: 4397
Create Model ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.221, Mean Reawards:88.06000632080084
Extract Data ......
Data Size: 4421
Create Model ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.231, Mean Reawards:85.18091468261682
Total Take Rates:0.25233333333333335, Total Mean Reawards:118.31754694880881


In [67]:
agent21 = Agent2(epochs=10, verbose=False)
rewards, takes = test_agent(agent21, 1000, 3, online=True)

Extract Data ......
Data Size: 5034
Create Model ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.19, Mean Reawards:73.71102320399791
Extract Data ......
Data Size: 3142
Create Model ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.269, Mean Reawards:111.4757047063834
Extract Data ......
Data Size: 4535
Create Model ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.287, Mean Reawards:118.18049326185815
Total Take Rates:0.24866666666666667, Total Mean Reawards:101.12240705741284


## Model-4 Random Forest

In [72]:
class Agent3(BaseAgent):
    '''
    LightBM
    '''
    
            
    def __init__(self, epochs=50, model_params =None,verbose=False ):
        '''
            Set model
        '''
        self.num_round = epochs
        self.params = {'num_leaves':31, 'num_trees':100, 'objective':'binary'}
        self.params['metric'] = 'auc'
        if model_params:
            self.params.update(model_params)
                 
    def reset_state(self, nb_users, nb_items, hist_df):
        ## reset data
        self.nb_users, self.nb_items, self.hist_df = nb_users, nb_items, hist_df        
        hist_df['action'] = hist_df.reward > 0
        
        ## extrain triple data
        print('Extract Data ......')
        train_data = self. _data_preprocessing(self.hist_df)
        
        ## Training model with new data
        print('Fit Model ......')
        
        self.model = self.fit(train_data, self.params)
        
    
    def fit(self, train_data ,params = None, verbose=True):
        if params:
            self.params.update(params)
        self.model =  lgb.train(self.params, train_data, self.num_round)
        return self.model

        
    def predict(self, state):
        return np.argmax(self.predict_prob(state))
    
    
    def predict_prob(self, state):
        state = np.array(state, dtype=int)
        return self.model.predict( state)
    
    
    def update(self, state, action ,reward):
        '''
        Update model with new data.
        This function is used for online model
        '''
        user_id = state[action][0]
        item_id = state[action][1]

        row = { 'user':state[action][0],
                    'item':state[action][1],
                    'price':state[action][2],
                    'reward': reward,
                    'weight': 1,
                    'v0':state[action][3],
                    'v1':state[action][4],
                    'v2':state[action][5],
                    'v3':state[action][6],
                    'v4':state[action][7],
                 }
        try:
            self.hist_df = self.hist_df.append(row, ignore_index=True)
            train_data = self._data_preprocessing(self.hist_df)
            self.fit(train_data, self.params)
            
        except Exception as e:
            print(('user_id:', user_id))
            print(state)
            print(triplet_inputs)
            raise Exception(e)
    
            
    def _data_preprocessing(self, hist_df):
        '''
        Transformation orignal data to trainable form
        '''
        X_train = hist_df[ ['user', 'item', 'price','v0', 'v1', 'v2', 'v3', 'v4']]
        y_train = hist_df['action']
        train_data = lgb.Dataset(X_train, label=y_train)
        
        return train_data
        
        
        
    def _build_model(self,**argument):
                
        return lgb.LGBMClassifier(**argument)
        

###  Test

In [73]:
agent31 = Agent3(epochs=10, verbose=False)
rewards, takes = test_agent(agent31, 1000, 3, online=False)

Extract Data ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.22, Mean Reawards:118.64634969642262
Extract Data ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.269, Mean Reawards:134.11473711822467
Extract Data ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.305, Mean Reawards:168.64493057980096
Total Take Rates:0.26466666666666666, Total Mean Reawards:140.46867246481634


In [75]:
agent31 = Agent3(epochs=10, verbose=False)
rewards, takes = test_agent(agent31, 1000, 3, online=True)

Extract Data ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.314, Mean Reawards:174.89410351794143
Extract Data ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.251, Mean Reawards:131.26315718943735
Extract Data ......
Fit Model ......


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Take Rates:0.293, Mean Reawards:187.5031038381028
Total Take Rates:0.286, Total Mean Reawards:164.55345484849335
