# Prediction of Coupon Redemption

## dependencies

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date

pd.set_option('display.max_rows', 20)

## data loading

In [4]:
offline_train = pd.read_csv("dataset/ccf_offline_stage1_train.csv")
online_train = pd.read_csv("dataset/ccf_online_stage1_train.csv")
offline_test = pd.read_csv("dataset/ccf_offline_stage1_test_revised.csv")

In [5]:
offline_train.head()

Unnamed: 0,User_id,Merchant_id,Coupon_id,Discount_rate,Distance,Date_received,Date
0,1439408,2632,,,0.0,,20160217.0
1,1439408,4663,11002.0,150:20,1.0,20160528.0,
2,1439408,2632,8591.0,20:1,0.0,20160217.0,
3,1439408,2632,1078.0,20:1,0.0,20160319.0,
4,1439408,2632,8591.0,20:1,0.0,20160613.0,


In [6]:
online_train.head()

Unnamed: 0,User_id,Merchant_id,Action,Coupon_id,Discount_rate,Date_received,Date
0,13740231,18907,2,100017492.0,500:50,20160513.0,
1,13740231,34805,1,,,,20160321.0
2,14336199,18907,0,,,,20160618.0
3,14336199,18907,0,,,,20160618.0
4,14336199,18907,0,,,,20160618.0


In [7]:
offline_test.head()

Unnamed: 0,User_id,Merchant_id,Coupon_id,Discount_rate,Distance,Date_received
0,4129537,450,9983,30:5,1.0,20160712
1,6949378,1300,3429,30:5,,20160706
2,2166529,7113,6928,200:20,5.0,20160727
3,2166529,7113,1808,100:10,5.0,20160727
4,6172162,7605,6500,30:1,2.0,20160708


## feature engineering

### feature functions

In [62]:
#get discount type
def get_discount_type(s)->str:
    s=str(s)
    if ":" in s:
        return "reduction"
    elif "." in s:
        return "rate"
    elif s == "fixed":
        return "fixed"
    else:
        return None
    
#get uniform discount rate
def get_uniform_discount_rate(s)->float:
    str_value = str(s)
    rate = None
    if "." in str_value:
        rate = float(s)
    elif ":" in str_value:
        split = str_value.split(":")
#         if(len(split)==2):
        rate = float(1-float(split[1])/float(split[0]))
#         else:
#             rate = float(split[0])
    return rate

#get promotion condition
def get_promotion_condition(s)->int:
    if pd.isna(s):
        return None
    str_value = str(s)
    if ":" in str_value:
        return int(str_value.split(":")[0])
    else:
        return -1
    
#get promotion amount
def get_promotion_amount(s)->int:
    if pd.isna(s):
        return None
    str_value = str(s)
    if ":" in str_value:
        return int(str_value.split(":")[1])
    else:
        return -1

# get date gap
def get_date_diff(d1,d2)->int:
    if np.isnan(d1) or np.isnan(d2):
        return None
    return int(d2-d1)

# get label(coupon was used within 15 days)
def get_label(d1,d2)->bool:
    gap = get_date_diff(d1,d2)
    if not pd.isna(gap):
        return gap<=15
    else:
        return None

def add_feature(df):
    df_d = df.copy()
    df_d['discount_type']=df_d['Discount_rate'].apply(get_discount_type)
    df_d['uniform_discount_rate']=df_d['Discount_rate'].apply(get_uniform_discount_rate)
    df_d['promotion_condition']=df_d['Discount_rate'].apply(get_promotion_condition)
    df_d['promotion_amonut']=df_d['Discount_rate'].apply(get_promotion_amount)
    df_d['date_gap']=df_d[['Date_received','Date']].apply(lambda x:get_date_diff(x["Date_received"],x["Date"]),axis=1)
    return df_d

### tool function

### feature extraction

In [65]:
offline_train_d = add_feature(offline_train)
online_train_d = add_feature(online_train)