# Simulate spending

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightgbm as lgb
from src.utils.simulate_spending import *

In [2]:
# Load trained model
reg = lgb.Booster(model_file='./outputs/models/lgb.txt')

# Load data
portfolio = pd.read_csv('./data/final/portfolio.csv')
features = pd.read_csv('./data/final/features_lgb.csv')

In [3]:
# Get extracted features for customer and offers
profile_features = subset_features(features, 'customer')
offer_features = subset_features(features, 'offer')

In [4]:
# Match offer with the extracted features
offer_labels = get_offer_labels(offer_features, portfolio)

In [5]:
# Simulate how customer would spend under each offer
simulated_transactions = simulate_transactions(profile_features, offer_features)

# Predict spending
simulated_transactions['predicted_spending'] = reg.predict(simulated_transactions)

In [6]:
# Get labels for the offers in the simulated transactions
simulated_transactions['offer'] = np.tile(offer_labels, profile_features.shape[0])

In [7]:
# Get optimum offer type for each customer
optimum_offers = simulated_transactions.groupby('person').apply(lambda x: x.loc[x['predicted_spending'].idxmax(), 'offer'])

In [8]:
optimum_offers.value_counts()

0b1e1539f2cc45b7b9fa7c272da2e1d7    7860
ae264e3637204a6fb9bb56bc8210ddfd    6628
dtype: int64

It appears that the most optimum offers were:

  - Buy one get one, customer pay minimum 10 dollars and get a free drink with maximum value of 10 dollars (`ae264e3637204a6fb9bb56bc8210ddfd`)
  - Pay 20, get 25% off (`0b1e1539f2cc45b7b9fa7c272da2e1d7`)

with a slight preference for the discount one. In addition, since `no_offer` did not show up in the optimum offer list, it can be seen that customers are stimulated by offers in general.
Now, let's see how the customer would spend with and without an offer, and compare the results

In [9]:
# Simulate spending
no_offer_spending = get_spending_without_offer(simulated_transactions)
max_spending = get_max_spending(simulated_transactions)

In [10]:
# Compare spending with and without offers
spending_comparison = compare_spendings(no_offer_spending, max_spending)
spending_comparison.head(10)

Unnamed: 0,person,no_offer_spending,optimum_offer_spending,spending_increased,pct_change
0,6813,23.231156,25.091666,1.86051,8.008685
1,12815,18.295934,20.948339,2.652405,14.497235
2,3167,11.541955,14.073435,2.53148,21.932857
3,2611,3.556137,9.597052,6.040915,169.872937
4,9721,12.472459,14.760702,2.288243,18.346369
5,2796,19.55484,22.060166,2.505326,12.811795
6,4265,29.074745,31.191126,2.116381,7.279104
7,11066,19.481874,22.039774,2.5579,13.12964
8,12077,10.953477,13.847586,2.894109,26.421833
9,14033,15.856154,18.848653,2.9925,18.872796


In [12]:
spending_comparison[['spending_increased', 'pct_change']].mean()

spending_increased      4.248295
pct_change            105.751395
dtype: float64

The results suggested that by applying the above policy, customer spending will increase by \$4 or 105% on average.

In [13]:
spending_comparison.to_csv('./outputs/spending_comparison.csv', index=False)
print('Done')

Done
