In [22]:
import jax
import numpy as np

import matplotlib.pyplot as plt
import pandas as pd

import torch
import torch.jit
import torch.distributions.normal as normal
import torch.distributions.uniform as uniform
import torch.distributions.beta as beta
from torch.utils.data import Dataset, DataLoader, ConcatDataset

# multi arm bandit base aglorithms 
from base_algos_torch import action_value_init, action_value_update, beta_init, beta_update, simulate_salesdata

In [23]:
EPOCHS = 500
BATCH_SIZE = 64
LEARNING_RATE = 0.001

In [24]:
# Random seed to make our experiment replicable 
SEED = 142

# Number of visitors we want to simulate
NUM_VISITS = 5000

PRICE_LIST = torch.arange(0, 1, 0.05)
FEATURE_SPACE = torch.tensor([[0, 1], [1, 0]], dtype=torch.float32)
PRICE_SENSITIVITY_PARMS = torch.tensor([[0.3,0.05], [0.5,0.1]], dtype=torch.float32)

In [25]:
# simulate sales
ds = simulate_salesdata(
    N = 10000,
    features = FEATURE_SPACE,
    price_list = PRICE_LIST,
    price_sensitivity_params = PRICE_SENSITIVITY_PARMS,
    seed = 12
)

In [26]:
ds.shape
X = ds[:3].permute(1, 0)
y = ds[3].unsqueeze(1)

In [6]:
## train data
class TrainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_data = TrainData(torch.FloatTensor(X), 
                       torch.FloatTensor(y))

In [7]:
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

In [8]:
# build custom module for logistic regression
class LogisticRegression(torch.nn.Module):    
    # build the constructor
    def __init__(self, n_inputs, n_outputs):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(n_inputs, n_outputs)
    # make predictions
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
# instantiate the model
n_inputs = 3 
n_outputs = 1
model = LogisticRegression(n_inputs, n_outputs)

model.to(device)
print(model)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

LogisticRegression(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)


In [None]:
model.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch)
        acc = binary_acc(y_pred, y_batch)
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f}')

In [37]:
def crossover_learning(X, y, price_list):

    price = X[2]
    features = X[:2]
    sold = y
    
    if sold:
        relevant_prices = price_list[price_list <= price]
    else:
        relevant_prices = price_list[price_list >= price]

    stacked_features = torch.stack([features] * len(relevant_prices), dim=0)
    X = torch.cat((stacked_features, relevant_prices.unsqueeze(1)), dim=1)
    y = sold.expand(len(relevant_prices),1)

    return X, y

In [38]:
# instantiate the model
n_inputs = 3 
n_outputs = 1
model = LogisticRegression(n_inputs, n_outputs)

model.to(device)
print(model)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

LogisticRegression(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)


In [None]:
model.train()
for e in range(1, len(X)):
    epoch_loss = 0
    epoch_acc = 0

    X_co, y_co = crossover_learning(X[e], y[e], price_list = PRICE_LIST)
    
    X_batch, y_batch = X_co.to(device), y_co.to(device)
    optimizer.zero_grad()
    
    y_pred = model(X_batch)
    
    loss = criterion(y_pred, y_batch)
    acc = binary_acc(y_pred, y_batch)
    
    loss.backward()
    optimizer.step()
    
    epoch_loss += loss.item()
    epoch_acc += acc.item()

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f}')