# Machine Learning Models
## Load Libraries

In [1]:
import pandas as pd
import pytimetk as tk

import pycaret.classification as clf
import pycaret.regression as reg

In [13]:
# constants 
profit_margin = 0.15 #15% Profit on Products

In [2]:
# Helper Function
from datetime import datetime

# differences in months.
def diff_month(d1, d2):
    if ((d1.year - d2.year) * 12 + d1.month - d2.month) == 0:
        return 1
    else:
        return (d1.year - d2.year) * 12 + d1.month - d2.month

In [3]:
# Helper Functions

# Converting Churn Rates:
###########################################################
# To convert annual churn to a monthly churn rate:
def churn_mthly_2_annual(mthly_churn_rate):
    return (1.0-((1.0-mthly_churn_rate)**(12)))

# To convert monthly churn to an annual churn rate:
def churn_annual_2_mthly(annual_churn_rate):
    return (1.0-((1.0-annual_churn_rate)**(1/12.0)))

# To convert churn rate to any period:
def churn_any_period_2_mthly(period_churn_rate, num_mths_in_period):
    return (1.0-((1.0-period_churn_rate)**(1/num_mths_in_period)))


# Customer Lifetime Value
###########################################################
# Customer Lifetime Months 
def cust_lifetime_in_mths(MonthlyChurnRate):
    return (1 / MonthlyChurnRate)

# Customer Lifetime Years
def cust_lifetime_in_years(MonthlyChurnRate):
    return (cust_lifetime_in_mths(MonthlyChurnRate) / 12.0)

# Monthly Churn Rate % from customer lifetime in months.
def mthly_churn_rate_from_mthly_lifetime(CustomerLifetimeMonths):
    return (1 / CustomerLifetimeMonths)

# Monthly Churn Rate % from customer lifetime in years.
def mthly_churn_rate_from_yearly_lifetime(CustomerLifetimeYears):
    return (1 / (CustomerLifetimeYears * 12))
    
# Decay
###########################################################
# Want to know how many customers will remain after 6, 12, 24 months?
# Here is how to calculate how many customers in a cohort will be left at Month X, 
# using exponential decay.

def custs_remaining(MonthlyChurnRate, StartingCohortCount, NumberOfMonthsInTheFuture):
    return (StartingCohortCount * ((1 - MonthlyChurnRate) ^ NumberOfMonthsInTheFuture))


# Calculate Churn
###########################################################
# Annual Churn Rate
def annual_churn_rate(CohortYearStartCount, CohortYearEndCount):
    return ((CohortYearStartCount - CohortYearEndCount) / CohortYearStartCount)

# Monthly Churn Rate
def mthly_churn_rate(CohortMonthStartCount, CohortMonthEndCount):
    return ((CohortMonthStartCount - CohortMonthEndCount) / CohortMonthStartCount)

# Any Period Churn Rate
def any_period_churn_rate(CohortPeriodStartCount, CohortPeriodEndCount):
    return ((CohortPeriodStartCount - CohortPeriodEndCount) / CohortPeriodStartCount)


## Import Data

In [4]:
# Accomodate raw path to variables
raw_customer, raw_orders = "./input/customers.csv", "./input/orders.csv"
raw_products, raw_sales = "./input/products.csv", "./input/sales.csv"

# Read-in data
customer, order = pd.read_csv(raw_customer), pd.read_csv(raw_orders)
product, sales_data = pd.read_csv(raw_products), pd.read_csv(raw_sales)

In [5]:
print(order['order_date'].min())
print(order['order_date'].max())

2021-1-1
2021-9-9


In [6]:
order.head(3)

Unnamed: 0,order_id,customer_id,payment,order_date,delivery_date
0,1,64,30811,2021-8-30,2021-09-24
1,2,473,50490,2021-2-3,2021-02-13
2,3,774,46763,2021-10-8,2021-11-03


In [7]:
df = order.copy()
df['order_date'] = pd.to_datetime(df['order_date'])

In [8]:
# Select a cohort
df1 = df[(df['order_date']>=datetime(2021, 4, 1))&
         (df['order_date']<datetime(2021, 5, 1))]
df1.head(3)

Unnamed: 0,order_id,customer_id,payment,order_date,delivery_date
6,7,626,37666,2021-04-05,2021-04-11
7,8,58,28484,2021-04-12,2021-05-01
20,21,124,11261,2021-04-04,2021-04-17


## EDA

In [9]:
(df[['order_date','payment']].summarize_by_time(
    date_column = 'order_date',
    value_column = 'payment',
    agg_func = 'sum',
    freq = 'M'
)
 .plot_timeseries('order_date','payment'))

In [10]:
(df[['order_date','payment']].summarize_by_time(
    date_column = 'order_date',
    value_column = 'payment',
    agg_func = 'sum',
    freq = 'D'
)
 .plot_timeseries('order_date','payment'))

In [11]:
(df1[['order_date','payment']].summarize_by_time(
    date_column = 'order_date',
    value_column = 'payment',
    agg_func = 'sum',
    freq = 'D'
)
 .plot_timeseries('order_date','payment'))

# Machine Learning Models
### Task:
- What will be the customers spend in the next 90 days? (Regression Problem)
- What is the probablilty for a customer to make another purchase in the next 90 days? (Classification Problem) 

In [12]:
# I'm a 53:28