# Scaling Offers Metrics

### What this code do?
- scales the number of customers & transaction value metrics in the redeemer and non redeemer group by comparing transaction volume and amount to that of unmatched retailer transactions

### What assumptions do we have to make for offers metrics when scaling?
- the new/existing segment splits (percent of customers in each group) are consistent with Flux customers for the retailer
- the transaction frequency per redemption group (reddemer vs non_redeemer) is consistent with Flux (no scaler)
- the scaler on average transaction amount between flux and the redeemer is the same across redeemer groups

In [None]:
# install required packages
import pandas as pd
import numpy as np
from utils.database import query_from_file
import datetime
import time
from datetime import datetime, timedelta
path = '/Users/jennamiles/Documents/credentials'

In [None]:
#this is from database.py
#need to update in utils because of directory issue
import os
from psycopg2 import connect
from pandas import read_sql

def query(sql_statement: str, **kwargs):
    with open(f'{path}', 'r') as credentials:
        connection = connect(credentials.read())
        cursor = connection.cursor()

        if len(kwargs) > 0:
            dataframe = read_sql(sql_statement.format(**kwargs), connection)
        else:
            dataframe = read_sql(sql_statement, connection)

        assert len(dataframe) > 0, 'query returned no results'
        return dataframe
    
def query_from_file(sql_file: str, **kwargs):
    with open(sql_file, 'r') as f:
        return query(f.read(), **kwargs)

In [81]:
#define the retailer, the offer
retailer = 'pure'
offer_name = 'free coffee yellow'
#could use offer sku here instead of offer if easier
#may also want ot add in location id -> in this case makes the scalers by location too 
location_id = ''

sql_flux = 'pure_txns.sql'
sql_retailer = 'pure_all.sql'

In [110]:
#bring in the transactions for both flux and all transactions 
#read in pure data from SQL query:
start = time.time()
df = query_from_file(sql_flux)
df_all = query_from_file(sql_retailer)
end = time.time()
query_time = end-start
print(f"Query took {query_time:.2f} seconds")

Query took 313.14 seconds


In [112]:
#flag redemption transactions 
df['redemption_flag'] = np.where(df['name'].str.lower() == offer_name,1,0)
# need to match onto items if possible here
df_all['redemption_flag'] = np.where(df_all['name'].str.lower() == offer_name,1,0)

#flag the offer period and pre-period
df_all['date'] = df_all.adjusted_transaction_date.dt.date
df['date'] = df.transaction_date.dt.date

start = df_all[df_all['redemption_flag']==1].adjusted_transaction_date.min()
end = df_all[df_all['redemption_flag']==1].adjusted_transaction_date.max()
print(f'start date is {start}')
print(f'end date is {end}')

df_all['offer_period'] = np.where(((df_all['date']>= start) & (df_all['date']<= end)),1,0)
df_all['pre_offer_period'] = np.where((df_all['date']<= start),1,0)

df['offer_period'] = np.where(((df['date']>= start) & (df['date']<= end)),1,0)
df['pre_offer_period'] = np.where((df['date']<= start),1,0)

start date is 2019-11-27 07:04:06+00:00
end date is 2021-04-12 08:23:00+00:00


In [297]:
# customer scalers during offer period
# 1) Flux data
transactions_flux = df[df.offer_period == 1].nunique()['receipt_id']
redemptions_flux = df[(df.redemption_flag == 1) & (df.offer_period == 1)].nunique()['receipt_id']
redemption_receipts = df[(df.redemption_flag == 1) & (df.offer_period == 1)]['receipt_id'].unique()
non_redemptions_flux = df[(~df.receipt_id.isin(redemption_receipts)) & (df.offer_period == 1)].nunique()['receipt_id']
redeemers_flux = df[(df.redemption_flag == 1) & (df.offer_period == 1)].nunique()['customer_id']
redeemers = df[(df.redemption_flag == 1) & (df.offer_period == 1)]['customer_id'].unique()
non_redeemers_flux = df[(~df.customer_id.isin(redeemers)) & (df.offer_period == 1)].nunique()['customer_id']
customers_flux = df[df.offer_period == 1].nunique()['customer_id']
redeemers_receipts_flux = df[(df.offer_period == 1)&(df.customer_id.isin(redeemers))].nunique()['receipt_id']
non_redeemers_receipts_flux = df[(df.offer_period == 1)&(~df.customer_id.isin(redeemers))].nunique()['receipt_id']
# 2) Retailer data
transactions_retailer = df_all[df_all.offer_period == 1].nunique()['txn_id']
redemptions_retailer = df_all[(df_all.redemption_flag == 1) & (df_all.offer_period == 1)].nunique()['txn_id']
redemption_txns = df_all[(df_all.redemption_flag == 1) & (df_all.offer_period == 1)]['txn_id'].unique()
non_redemptions_retailer = df_all[(~df_all.txn_id.isin(redemption_txns)) & (df_all.offer_period == 1)].nunique()['txn_id']
# 3) Scalers
redemptions_scaler = redemptions_retailer/redemptions_flux
non_redemptions_scaler = non_redemptions_retailer/non_redemptions_flux
# 4) Retailer customer estimates
# multiplying by transactions scaler the same as estimating atf and diving transactions by that
est_redeemers_retailer = np.floor(redeemers_flux*redemptions_scaler)
est_non_redeemers_retailer = np.floor(non_redeemers_flux*non_redemptions_scaler)
est_customers = est_redeemers_retailer+est_non_redeemers_retailer #should be more accurate than having a scaler across all transactions
# 5) transaction value scalers - redeemer and non-redeemer group treated the same here - can't identify all redemeer transactions (includes non redemption redeemer transactions) for retailer
ATV_flux = df[df.offer_period == 1][['receipt_id','total_amount']].drop_duplicates()['total_amount'].agg({'mean','median'})
ATV_retailer = df_all[df_all.offer_period == 1][['txn_id','amount']].drop_duplicates()['amount'].agg({'mean','median'})
ATV_scalers = ATV_retailer/ATV_flux
mean_atv_scaler = ATV_scalers['mean']
median_atv_scaler = ATV_scalers['median']

In [298]:
print('Note:')
print('ATF is assumed to be the same across retailer as seen in flux per redemption groups')
print('This means the scaler for transactions is the same as for customers')
print('\n')
print('During the offer period:')
print(f'ATF redeemers is {redeemers_receipts_flux/redeemers_flux:.2f}')
print(f'ATF non-redeemers is {non_redeemers_receipts_flux/non_redeemers_flux:.2f}')
print(f'Redeemers and redemptions scaler is {redemptions_scaler:.2f}')
print(f'Non-redemptions and non-redeemers scaler is {non_redemptions_scaler:.2f}')
print(f'ATV scaler for mean values is {mean_atv_scaler:.4f}')
print(f'ATV scaler for median values is {median_atv_scaler:.4f}')
print(f'Total redemptions during offer period is {redemptions_retailer}')
print(f'Total transactions during offer period is {transactions_retailer}')
print(f'Total estimated redeemers during offer period is {est_redeemers_retailer:.0f}')
print(f'Total estimated customers during offer period is {est_customers:.0f}')

Note:
ATF is assumed to be the same across retailer as seen in flux per redemption groups
This means the scaler for transactions is the same as for customers


During the offer period:
ATF redeemers is 11.99
ATF non-redeemers is 3.05
Redeemers and redemptions scaler is 36.42
Non-redemptions and non-redeemers scaler is 53.62
ATV scaler for mean values is 1.0991
ATV scaler for median values is 1.0227
Total redemptions during offer period is 11217
Total transactions during offer period is 1329235
Total estimated redeemers during offer period is 5717
Total estimated customers during offer period is 410040
