In [1]:
####
# Creator;Maintainer: Tamas Szuromi - tromika@gmail.com
# You find the documentation of the model in the CLV notebook
####

In [14]:
import numpy as np
import pandas as pd

from lifetimes import BetaGeoFitter, ModifiedBetaGeoFitter, GammaGammaFitter
from lifetimes.utils import summary_data_from_transaction_data

In [8]:
# Utilities
def dateCorrect(df):
    df['purchase_date'] = pd.to_datetime(df['purchase_date'])
    return df

In [17]:
#######
# Predictor function
######
# Fit the Modified Beta and the Gamma gamma models
#
# Parameters:
#            train: training data location (csv expected in the given format)
#            test: test data location (csv expected with the same format)
#            fcPeriod: forecasted period. The model is using the data on monthly basis to basically it's the forecasted number of months
#
# Return:
#         Predicted sales amount per transactions
#         Predicted transactions 
#         Predicted sales amount
#        


def predictor(train, test, fcPeriod=12):
    train = dateCorrect(pd.read_csv(train))
    test = dateCorrect(pd.read_csv(test))
    trainFiltered = train[(train.sales_amount>0) & (train.sales_amount<=np.percentile(train['sales_amount'],[99.995])[0]) & (train.quantity<=np.percentile(train['quantity'],[99.99])[0])].groupby(['contact_id', 'order_id']).agg({'sales_amount': np.sum, 'purchase_date':'first'}).reset_index()
    testFiltered = test.groupby(['contact_id', 'order_id']).agg({'sales_amount': np.sum, 'purchase_date':'first'}).reset_index()
    trainingSet = summary_data_from_transaction_data(trainFiltered, 
                                                 'contact_id',
                                                 'purchase_date',
                                                 monetary_value_col= 'sales_amount',
                                                 freq='M',
                                                 observation_period_end=train['purchase_date'].max())
    testSet = summary_data_from_transaction_data(testFiltered, 
                                                 'contact_id',
                                                 'purchase_date',
                                                 freq='M',
                                                 observation_period_end=test['purchase_date'].max())
    
    mbgf = ModifiedBetaGeoFitter(penalizer_coef=0.0001)
    mbgf.fit(trainingSet['frequency'], trainingSet['recency'], trainingSet['T'])
    
    ReturningtTrainingSet = trainingSet[trainingSet['frequency']>0]
    ggf = GammaGammaFitter(penalizer_coef=0.0001)
    ggf.fit(ReturningtTrainingSet['frequency'],
        ReturningtTrainingSet['monetary_value'])
    monetaryPerTransaction = ggf.conditional_expected_average_profit(
            trainingSet['frequency'],
            trainingSet['monetary_value']
        ).mean()

    predTransactions = mbgf.conditional_expected_number_of_purchases_up_to_time(fcPeriod,
            testSet['frequency'],
            testSet['recency'],
            testSet['T']).sum()

    print "Predicted sales amount per transactions: %s, Predicted transactions: %s, Predicted sales amount: %s " % (
        monetaryPerTransaction,
        predTransactions,
        predTransactions*monetaryPerTransaction
    )

In [18]:
predictor("../data/raw/training.csv","../data/raw/training.csv")

Predicted sales amount per transactions: 106.544355994, Predicted transactions: 58680.0565726, Predicted sales amount: 6252028.8372 
