In [15]:
import pandas as pd
import pytimetk as tk


In [3]:
# Accomodate raw path to variables
raw_customer, raw_orders = "./input/customers.csv", "./input/orders.csv"
raw_products, raw_sales = "./input/products.csv", "./input/sales.csv"

# Read-in data
customer, order = pd.read_csv(raw_customer), pd.read_csv(raw_orders)
product, sales_data = pd.read_csv(raw_products), pd.read_csv(raw_sales)

In [5]:
print(order['order_date'].min())
print(order['order_date'].max())

2021-1-1
2021-9-9


In [11]:
order

Unnamed: 0,order_id,customer_id,payment,order_date,delivery_date
0,1,64,30811,2021-8-30,2021-09-24
1,2,473,50490,2021-2-3,2021-02-13
2,3,774,46763,2021-10-8,2021-11-03
3,4,433,39782,2021-5-6,2021-05-19
4,5,441,14719,2021-3-23,2021-03-24
...,...,...,...,...,...
995,996,345,37843,2021-1-13,2021-02-02
996,997,346,53831,2021-1-18,2021-01-31
997,998,407,53308,2021-5-5,2021-05-21
998,999,428,31643,2021-6-15,2021-07-12


In [18]:
df = order.copy()
df['order_date'] = pd.to_datetime(df['order_date'])

In [20]:
(df[['order_date','payment']].summarize_by_time(
    date_column = 'order_date',
    value_column = 'payment',
    agg_func = 'sum',
    freq = 'M'
)
 .plot_timeseries('order_date','payment'))

# Aggregation Models

In [25]:
customer_sales_1_df = (df
                       .groupby(['customer_id', 'order_id'])
                       .agg(
                           total_sales_basket = ('payment','sum'),
                           timestamp = ('order_date', 'min')
                       )
                       .reset_index()
                       .groupby('customer_id')
                       .agg(
                           time_days = ('timestamp', lambda x: (x.max() - x.min())),
                           frequency = ('order_id', 'nunique'),
                           total_sales=('total_sales_basket', 'sum'),
                           avg_sales=('total_sales_basket','mean')
                       )
                       .reset_index()
                       )
customer_sales_1_df

Unnamed: 0,customer_id,time_days,frequency,total_sales,avg_sales
0,1,45 days,3,70389,23463.0
1,7,0 days,1,48935,48935.0
2,10,0 days,1,45626,45626.0
3,11,0 days,1,41952,41952.0
4,12,0 days,1,39451,39451.0
...,...,...,...,...,...
612,994,0 days,1,58159,58159.0
613,995,0 days,1,58829,58829.0
614,996,178 days,2,52028,26014.0
615,998,195 days,4,101274,25318.5


In [30]:
summary_1 = {
    'average_sales': customer_sales_1_df['avg_sales'].mean(),
    'average_purchase_freq': customer_sales_1_df['frequency'].mean(),
    'churn_rate': 1 - (customer_sales_1_df['frequency'] < 2 ).sum() / len(customer_sales_1_df['frequency']),
    'max_days': customer_sales_1_df['time_days'].max()
}

summary_1 = pd.DataFrame([summary_1])
summary_1

Unnamed: 0,average_sales,average_purchase_freq,churn_rate,max_days
0,33869.854862,1.620746,0.426256,292 days


In [41]:
1-((customer_sales_1_df['frequency'] >= 2 ).sum() /
 customer_sales_1_df['frequency'].sum())

0.737

In [42]:
1-((customer_sales_1_df['frequency'] >= 2 ).sum() /
 len(customer_sales_1_df['frequency']))

0.573743922204214