In [None]:
import os
import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta
today = date.today().strftime("%Y%m%d")

import gc

In [None]:
### Preliminar functions and imports

def customer_category_attribution(x):
    if x==1:
        return 'One-timer'
    if x==2:
        return 'Two-timer'
    else:
        return 'Recurring'

def flatten (table):
    if type(table.columns)==pd.MultiIndex:
        columns_to_look = [name_tmp for name_tmp in table.columns]

        columns_df = [ str(t[0])+'_'+str(t[1]) for t in columns_to_look]
        columns_df.insert(0,table.index.name)

        df = pd.DataFrame(columns = columns_df)

        index = 0
        for i in table.index:
            row = [table[r][i] for r in columns_to_look]
            row.insert(0,i)
            df.loc[index] = row
            index = index + 1
        return(df)
    else :
        table = pd.DataFrame(table)
        table.reset_index(level=0, inplace=True)
        return table

df_articles = pd.read_csv('./Vertbaudet/France/src/Original/Articles/Export_FR_Articles_Julie.csv',
                                     usecols = ["DEPARTEMENT_NIV2","CODSAI","REFSTK", "TOP_MN_Max"])

df_codsoc = pd.read_csv('./Vertbaudet/France/src/20210921_CODSOC.csv')

df_client_dept = pd.read_csv('./20210923_Export_tab_FR/20210921_INDIVIDUS.tab', sep = '\t',
                 usecols = ['IDIND', 'CODPOST', "DATPREMCDEWEB"])

clients_mag = pd.read_csv("./Vertbaudet/France/src/20211004_UNIQUE_IDIND_TICKET.csv",sep=",")

df_client = pd.read_csv('./Vertbaudet/France/Output_old/iris_med_rev_clients.csv', usecols = ['IDIND', 'Mediane'])

df_IRIS_by_dep = pd.read_excel('./Vertbaudet/France/INSEE_data/INSEE_IRIS_by_dep.xlsx', usecols = ['CODGEO','MED18'])

# Load datas

In [None]:
gc.collect()
df = pd.DataFrame()
for year in range(2017,2022):
    print(year)
    gc.collect()
    df_tmp = pd.read_csv('./Vertbaudet/France/src/Original/Lignes_commandes/Export_FR_LIGNECOMMANDE_'+str(year)+'.csv', 
                              encoding = 'unicode_escape', 
                              usecols = ['IDIND', 'IDCLI', 'IDFOYER', 'DATCDE', 'DN', 'DBI', 'QTE', 'NUMCDE', 'CODSAI','REFSTK'])

    ### Merge with article table to get DEPARTEMENT_NIV2
    df_tmp = df_tmp.merge(df_articles, how = 'left', on = ["CODSAI","REFSTK"])
    
    ### Replace $null$
    df_tmp = df_tmp.replace({'$null$': np.nan})
    ### Clean IDIND index 
    df_tmp.IDIND = df_tmp.IDIND.astype(float)
    df_tmp.IDFOYER = df_tmp.IDFOYER.astype(float)
    
    ### Filter mag clients
    df_tmp = df_tmp[~df_tmp.IDIND.isin(clients_mag.IDIND)]

    ### Filter non-french command with CODSOC
    df_tmp = df_tmp.merge(df_codsoc, how = 'left', on = 'NUMCDE')
    df_tmp = df_tmp[df_tmp.CODSOC==0]

    #### Merge to get Mediane rev
    df_tmp = df_tmp.merge(df_client, how = 'left', on = 'IDIND')

    ### add departments and cohort
    df_tmp = df_tmp.merge(df_client_dept.replace({'$null$': np.nan}), how = 'left', on = 'IDIND')
    df_tmp = df_tmp[df_tmp.DATPREMCDEWEB != '$null$']
    df_tmp['DATCDE'] = pd.to_datetime(df_tmp["DATCDE"])
    df_tmp['DATPREMCDEWEB'] = pd.to_datetime(df_tmp["DATPREMCDEWEB"])
    #df_tmp['DEPARTEMENT'] = df_tmp['CODPOST'].apply(lambda x : str(x)[:2])
    df_tmp['COHORT'] = pd.to_datetime(df_tmp["DATPREMCDEWEB"]).dt.year
    
    ### filter cohort
    df_tmp = df_tmp[(df_tmp['COHORT'] >= 2017) & (df_tmp['COHORT'] < 2021)]
    
    ### filter on DN <= DB
    df_tmp = df_tmp[df_tmp.DN <= df_tmp.DBI]
    df_tmp = df_tmp[df_tmp.DN >= 0]

    df = pd.concat([df,df_tmp[['IDIND', 'DN', 'DBI', 'NUMCDE', 'DATCDE', 'DATPREMCDEWEB', 'DEPARTEMENT_NIV2', 'QTE', 'Mediane', 'COHORT', 'TOP_MN_Max']]])
  
    del df_tmp
    gc.collect()

### Adding fields

In [8]:
# Exclude B2B clients
df_b2b = pd.read_csv('./Vertbaudet/France/src/20211012_TB2B.csv')
df_b2b[['IDIND', 'TB2B']] = df_b2b[['IDIND', 'TB2B']].astype(float)
df = df.merge(df_b2b, how = 'left', on = 'IDIND')
df = df[df['TB2B']==0]
df = df.drop(columns = ['TB2B'])
del df_b2b

In [9]:
### Add CLIENT_CATEGORY
nb_commande = flatten(df.groupby(['IDIND']).agg({'NUMCDE':pd.Series.nunique}))
nb_commande.columns = ['IDIND','NB_ORDERS']
df = pd.merge(df,nb_commande,on='IDIND',how='left',suffixes=(False,False))
df['CLIENT_CATEGORY'] = df.NB_ORDERS.apply(lambda x: customer_category_attribution(x))

In [29]:
### Add DEPARTEMENT_CMD field
cat_order = df.groupby(['NUMCDE', 'DEPARTEMENT_NIV2']).agg({'DN': sum, 'DEPARTEMENT_NIV2': 'first'})
cat_order = cat_order.sort_values(by='DN',ascending=False).groupby('NUMCDE').agg({'DEPARTEMENT_NIV2': 'first'})
cat_order = cat_order.rename(columns = {'DEPARTEMENT_NIV2': 'DEPARTEMENT_CMD'})

df = pd.merge(df,cat_order, how='left', on='NUMCDE', right_index = False,  suffixes=(False,False))

del cat_order
gc.collect()

93

In [30]:
### Add DEPARTEMENT_FIRST (majority department of first command)
cat_rec = df.sort_values(by='DATCDE',ascending=True).drop_duplicates(subset='IDIND', keep='first', inplace=False)[['IDIND','DEPARTEMENT_CMD']]
cat_rec.columns = ['IDIND','DEPARTEMENT_FIRST']
df = pd.merge(df,cat_rec,on='IDIND',how='left',suffixes=(False,False))

In [9]:
### Add DEPARTMENT_MAJORITAIRE

univ_maj = df.groupby(['IDIND','DEPARTEMENT_NIV2']).agg({'DN' : 'sum'})
univ_maj.reset_index(inplace=True)


univ_maj = univ_maj.sort_values(by='DN',ascending=False).drop_duplicates(subset='IDIND', keep='first',inplace=False)
univ_maj.columns =['IDIND','DEPARTEMENT_MAJORITAIRE','DN']


df = pd.merge(df,univ_maj[['IDIND','DEPARTEMENT_MAJORITAIRE']],on='IDIND',how='left')

del univ_maj

- NB UNIV

In [None]:
def map_nb_univ(x):
    if x == 1:
        return '1'
    if x == 2:
        return '2'
    if x == 3:
        return '3'
    if x >= 4:
        return '4+'

### Create nb_univ field
df_nb_univ = df.groupby('IDIND').agg({'DEPARTEMENT_NIV2': pd.Series.nunique}).rename(columns = {'DEPARTEMENT_NIV2': 'NB_UNIV'})
df = df.merge(df_nb_univ, how = 'left', on = 'IDIND')

del df_nb_univ
gc.collect()

df['NB_UNIV_CAT'] = df['NB_UNIV'].apply(map_nb_univ)

- Compte de commande

In [11]:
nb_order =  flatten(df.sort_values(by='DATCDE',ascending=True).groupby(['NUMCDE']).agg({'IDIND':'first', 'DATCDE': 'first'}))
nb_order['ORDER_NUMBER'] = nb_order.sort_values(by='DATCDE',ascending=True).groupby(['IDIND']).cumcount()+1
df = pd.merge(df,nb_order[['NUMCDE','ORDER_NUMBER']],on='NUMCDE',how='left')

- LTV

In [17]:
import datetime
from datetime import datetime, time ,date ,timedelta

# Add DAT_12 and DAT_14 column
df['DAT_12'] = df['DATPREMCDEWEB'] + timedelta(days=365)
df['DAT_24'] = df['DATPREMCDEWEB'] + timedelta(days=730)

### Add LTV_12 column
df_ltv_12 = df[df['DATCDE']<=df['DAT_12']][['IDIND', 'DN']]
df_ltv_12 = df_ltv_12.groupby('IDIND').agg({'DN': sum})
df_ltv_12 = df_ltv_12.rename(columns = {'DN': 'LTV_12'})
print(df_ltv_12['LTV_12'].mean())
df = df.merge(df_ltv_12, how = 'left', on = 'IDIND')

del df_ltv_12

### Add LTV_24 column
df_ltv_24 = df[df['DATCDE']<=df['DAT_24']][['IDIND', 'DN']]
df_ltv_24 = df_ltv_24.groupby('IDIND').agg({'DN': sum})
df_ltv_24 = df_ltv_24.rename(columns = {'DN': 'LTV_24'})
print(df_ltv_24['LTV_24'].mean())
df = df.merge(df_ltv_24, how = 'left', on = 'IDIND')

del df_ltv_24

gc.collect()

118.4290346331584
145.9997675242216


0

- Churn delay

In [28]:
### Add churn_delay field
df_churn = df.sort_values(by = 'DATCDE', ascending = True).groupby('IDIND').agg({'DATCDE': 'last', 'DATPREMCDEWEB': 'last', 'IDIND':'last'})
df_second = df[df['ORDER_NUMBER'] == 2].drop_duplicates('IDIND')[['IDIND', 'DATCDE']].rename(columns = {'DATCDE': 'DATCDE_2'})
df_churn = df_churn.merge(df_second, how = 'left', left_index = True,  right_on = 'IDIND')
df_churn = df_churn.reset_index()
df_churn['CHURN_DELAY'] = (df_churn['DATCDE'] + timedelta(days=365)) - df_churn['DATPREMCDEWEB']
df_churn['CHURN_DELAY_2'] = (df_churn['DATCDE'] + timedelta(days=365)) - df_churn['DATCDE_2']
df_churn = df_churn.drop(columns = 'index')

df = df.merge(df_churn[['IDIND','CHURN_DELAY_2', 'CHURN_DELAY']], how = 'left', left_on = 'IDIND', right_on  = 'IDIND')

del df_churn
del df_second

df['CHURN_DELAY'] = df['CHURN_DELAY'].apply(lambda x : x.days)
df['CHURN_DELAY_2'] = df['CHURN_DELAY_2'].apply(lambda x : x.days)

gc.collect()


0

- Discount category

In [73]:
def categorize_distribution_discount(x):
    if x==0.:
        return('No discount')
    if (x>0) & (x<=0.13):
        return('0-13%')
    if (x>0.13) & (x<=0.16):
        return('13-16%')
    if (x>0.16) & (x<=0.3):
        return('16-30%')
    if (x>0.3) & (x<=0.5):
        return('30-50%')
    if (x>0.5) & (x<=0.7):
        return('50-70%')
    if (x>0.7) & (x<=0.9):
        return('70-90%')
    if x>0.9:
        return('>90%')
    
### Discount by article    
df['DISCOUNT'] = df.DBI - df.DN
df['DISCOUNT_PCT'] = 1 - df.DN/df.DBI
df['DISCOUNT_CAT'] = df["DISCOUNT_PCT"].apply(lambda x: categorize_distribution_discount(x))

gc.collect()

821

In [76]:
### Add DISCOUNT_CMD_CAT
df_discount_cmd = df.groupby('NUMCDE').agg({'DN': sum, 'DBI':sum})
df_discount_cmd['DISCOUNT_CMD_PCT'] = 1 - (df_discount_cmd.DN / df_discount_cmd.DBI)
df_discount_cmd['DISCOUNT_CMD_CAT'] = df_discount_cmd["DISCOUNT_CMD_PCT"].apply(lambda x: categorize_distribution_discount(x))
df = df.merge(df_discount_cmd['DISCOUNT_CAT_CMD'], how = 'left', on = 'NUMCDE')

gc.collect()

37

- Seniority

In [14]:
# SENIORITY = Seniority of a client as of today
df['SENIORITY'] = (datetime.today() - df['DATPREMCDEWEB']).dt.days

In [112]:
# SENIORITY_CMD = Seniority of the order
df['SENIORITY_CMD'] = (df.DATCDE - df.DATPREMCDEWEB).dt.days

In [31]:
df = df.drop(columns = 'SENIORITY_ABS')

In [32]:
### Seniority abs = Churn date - first web order
df_last_cmd = df[['IDIND', 'DATCDE', 'DATPREMCDEWEB']].sort_values(by = 'DATCDE', ascending = False).groupby('IDIND').agg({'DATCDE': 'first', 'DATPREMCDEWEB': 'first'})
df_last_cmd['SENIORITY_ABS'] = (df_last_cmd['DATCDE'] - df_last_cmd['DATPREMCDEWEB']).dt.days + 365
df = df.merge(df_last_cmd['SENIORITY_ABS'], how = 'left', on = 'IDIND')

del df_last_cmd
gc.collect()

0

- Acquisition channel

In [None]:
df_code_tracking = pd.read_csv('./Vertbaudet/France/src/20210928_CodeTracking.tab', sep = '\t')

df_input_code_tracking = pd.read_excel('./Vertbaudet/France/src/INPUT_CODES_TRACKING.xlsx', sheet_name = 'CODES_TRACKING')

df_code_tracking = df_code_tracking.merge(df_input_code_tracking, how = 'left', on = 'CODE_TRACKING')

df = df.merge(df_code_tracking[['NUMCDE', 'LEVIER', 'NEW_LEVIER']], how = 'left', on = 'NUMCDE')

- Opt-in

In [13]:
def optin_category(x):
    if x[0] < x[1]:
        return "Have already been Opt-in"
    elif x[2] == 0.:
        return "Never Opt-in"
    elif x[2] == 1.:
        return "Always Opt-in"
    return x[2]

In [14]:
optin = pd.read_csv(os.path.join("C:/Users/pierrick/eleven/Engagements - Vertbaudet/3. Data received/3. Extract/1. France/20211025_ADRESSABLITE.csv"),sep=",")



optin = optin.replace({"$null$": np.nan})
optin['DATMAJOPTMMAIL'] = pd.to_datetime(optin['DATMAJOPTMMAIL'], errors="coerce")
optin['TOPTINMMAIL'] = optin['TOPTINMMAIL'].astype('float')



df = df.merge(optin[['IDIND', 'DATMAJOPTMMAIL', 'TOPTINMMAIL']], on='IDIND', how='left')



df['OPTIN'] = df[['DATPREMCDEWEB', 'DATMAJOPTMMAIL', 'TOPTINMMAIL']].apply(lambda x: optin_category(x), axis=1)

- Segmentation selon la fréquence

In [15]:
def segmentation_freq(x):
    if x < 30:
        return '< 1 month'
    elif x < 90:
        return '1-3 months'
    elif x < 180:
        return '3-6 months'
    elif x < 270:
        return '6-9 months'
    elif x < 365 :
        return '9-12 months'
    elif x > 365 :
        return '> 12 months'

In [16]:
def segmentation_small_freq(x):
    if x < 7:
        return '< 1 week'
    elif x < 14:
        return '1-2 weeks'
    elif x < 21:
        return '2-3 weeks'
    elif x < 30:
        return '3-4 weeks'
    elif x < 90:
        return '1-3 months'
    elif x < 180:
        return '3-6 months'
    elif x < 270:
        return '6-9 months'
    elif x < 365 :
        return '9-12 months'
    elif x > 365 :
        return '> 12 months'

In [17]:
df_first = df[df['ORDER_NUMBER'] == 1].drop_duplicates('IDIND')[['IDIND', 'DATCDE']]

df_second = df[df['ORDER_NUMBER'] == 2].drop_duplicates('IDIND')[['IDIND', 'DATCDE']].rename(columns = {'DATCDE': 'DATCDE_2'})

df_first = df_first.merge(df_second, how = 'left', on = 'IDIND')

df_first['GAP'] = (df_first['DATCDE_2'] - df_first['DATCDE']).dt.days

df_first['FREQ_CAT'] = df_first['GAP'].apply(segmentation_freq)
df_first['FREQ_CAT_SMALL'] = df_first['GAP'].apply(segmentation_small_freq)

df = df.merge(df_first[['IDIND', 'FREQ_CAT']], how = 'left', on = 'IDIND')
df = df.merge(df_first[['IDIND', 'FREQ_CAT_SMALL']], how = 'left', on = 'IDIND')



del df_first
del df_second
gc.collect()

(14, 4)


15

# Analysis

### Opt-in

In [32]:
df.drop_duplicates('IDIND').groupby(['OPTIN']).agg({'CHURN_DELAY': 'mean'}).unstack().to_excel('./Vertbaudet/France/Output/'+today+'_churn_delay_by_optin.xlsx')

In [33]:
df.groupby(['FREQ_CAT_SMALL','OPTIN']).agg({'IDIND': pd.Series.nunique}).unstack().to_excel('./Vertbaudet/France/Output/'+today+'_optin_by_feq_cat_small.xlsx')

### Avg basket, DN, %clients

In [None]:
df.groupby('FREQ_CAT_SMALL').agg({'IDIND': pd.Series.nunique, 'DN': sum}).to_excel('./Vertbaudet/France/Output/'+today+'_dn_nb_client_by_freq_cat_small.xlsx')

### Similarité 1ere / 2nde commande

In [30]:
### Create NEW_DEP field: number of departments in the second order that were not in the first order

def dif(x,y):
    return len(y.difference(x))

df_list_dept_by_cmd = df[df['ORDER_NUMBER'].isin(range(1,3))].groupby(['IDIND', 'ORDER_NUMBER']).agg({'ORDER_NUMBER': 'first', 'DEPARTEMENT_NIV2': lambda x: list(pd.Series.unique(x))})
df_cmd_1_2 = df_list_dept_by_cmd[df_list_dept_by_cmd['ORDER_NUMBER']==1].merge(df_list_dept_by_cmd[df_list_dept_by_cmd['ORDER_NUMBER']==2], how = 'left', on = 'IDIND').rename(columns = {'DEPARTEMENT_NIV2_x': 'DEPART_1', 'DEPARTEMENT_NIV2_y': 'DEPART_2'})
df_cmd_1_2 = df_cmd_1_2[(df_cmd_1_2.DEPART_1.notna()) & (df_cmd_1_2.DEPART_2.notna())]
df_cmd_1_2['NEW_DEP'] = df_cmd_1_2.apply(lambda x: dif(set(x.DEPART_1), set(x.DEPART_2)), axis = 1)
df = df.merge(df_cmd_1_2['NEW_DEP'], how = 'left', left_on = 'IDIND', right_index = True)

In [36]:
df_new_dep_cat = df.groupby(['FREQ_CAT', 'NEW_DEP']).agg({'IDIND': pd.Series.nunique}).unstack().replace(np.nan, 0)

In [106]:
df_new_dep_cat.to_excel('./Vertbaudet/France/Output/'+today+'_new_dep_by_freq_cat.xlsx')

In [36]:
df_new_dep_cat_small_freq = df.groupby(['FREQ_CAT_SMALL', 'NEW_DEP']).agg({'IDIND': pd.Series.nunique}).unstack().replace(np.nan, 0)

In [37]:
df_new_dep_cat_small_freq.to_excel('./Vertbaudet/France/Output/'+today+'_new_dep_by_freq_cat_small.xlsx')

- Focus on the '0 new dept / <30 days' clients

In [40]:
df[(df.NEW_DEP == 0) & (df.FREQ_CAT == '< 1 month')].groupby('DEPARTEMENT_MAJORITAIRE').agg({'IDIND': pd.Series.nunique}).to_excel('./Vertbaudet/France/Output/'+today+'_0ninf30_univ.xlsx')

In [41]:
df[(df.NEW_DEP != 0) & (df.FREQ_CAT == '< 1 month')].groupby('DEPARTEMENT_MAJORITAIRE').agg({'IDIND': pd.Series.nunique}).to_excel('./Vertbaudet/France/Output/'+today+'_no0ninf30_univ.xlsx')

In [42]:
df[df.FREQ_CAT != '< 1 month'].groupby('DEPARTEMENT_MAJORITAIRE').agg({'IDIND': pd.Series.nunique}).to_excel('./Vertbaudet/France/Output/'+today+'_plus30_univ.xlsx')

### Churn delay

In [None]:
df_churn_delay = df.drop_duplicates('IDIND').groupby('FREQ_CAT').agg({'CHURN_DELAY': 'mean'})

In [None]:
df_churn_delay.to_excel('./Vertbaudet/France/Output/'+today+'_churn_delay_by_freq_cat.xlsx')

In [18]:
df_churn_delay_2 = df.drop_duplicates('IDIND').groupby('FREQ_CAT').agg({'CHURN_DELAY_2': 'mean'})

In [20]:
df_churn_delay_2.to_excel('./Vertbaudet/France/Output/'+today+'_churn_delay_2_by_freq_cat.xlsx')

In [50]:
df_churn_delay_2_small = df.drop_duplicates('IDIND').groupby('FREQ_CAT_SMALL').agg({'CHURN_DELAY_2': 'mean'})

In [51]:
df_churn_delay_2_small.to_excel('./Vertbaudet/France/Output/'+today+'_churn_delay_2_by_freq_cat_small.xlsx')

### Nbr of universes by reorder delay

In [16]:
### Create output df
df_nb_univ_by_freq_cat = df.groupby(['FREQ_CAT', 'NB_UNIV_CAT']).agg({'IDIND': pd.Series.nunique}).unstack()

In [17]:
### to_excel
df_nb_univ_by_freq_cat.to_excel('./Vertbaudet/France/Output/'+today+'_nbr_univ_by_freq_cat.xlsx')

In [46]:
df_nb_univ_by_freq_cat_small_freq = df.groupby(['FREQ_CAT_SMALL', 'NB_UNIV_CAT']).agg({'IDIND': pd.Series.nunique}).unstack()

In [47]:
df_nb_univ_by_freq_cat_small_freq.to_excel('./Vertbaudet/France/Output/'+today+'_nbr_univ_by_freq_cat_small.xlsx')

### Recruitment department by reorder delay

In [22]:
df_entry_dept = df[df['DATCDE']==df['DATPREMCDEWEB']].groupby(['FREQ_CAT', 'DEPARTEMENT_CMD']).agg({'NUMCDE': pd.Series.nunique}).unstack()

In [23]:
df_entry_dept.to_excel('./Vertbaudet/France/Output/'+today+'_entry_dept_by_reorder_freq.xlsx')

In [28]:
df_entry_dept_small_freq = df[df['DATCDE']==df['DATPREMCDEWEB']].groupby(['FREQ_CAT_SMALL', 'DEPARTEMENT_CMD']).agg({'NUMCDE': pd.Series.nunique}).unstack()

In [29]:
df_entry_dept_small_freq.to_excel('./Vertbaudet/France/Output/'+today+'_entry_dept_by_reorder_freq_small_v2.xlsx')

- Seniority churn by entry dept

In [33]:
df.drop_duplicates('IDIND').groupby('DEPARTEMENT_FIRST').agg({'SENIORITY_ABS': 'mean'}).to_excel('./Vertbaudet/France/Output/'+today+'_senio_churn_by_dep_first.xlsx')

### Majority department by reorder frequency

In [26]:
df_univ_maj = df.groupby(['FREQ_CAT', 'DEPARTEMENT_MAJORITAIRE']).agg({'IDIND': pd.Series.nunique}).unstack()

In [27]:
df_univ_maj.columns = df_univ_maj.columns.droplevel()

In [28]:
df_univ_maj.to_excel('./Vertbaudet/France/Output/'+today+'_univ_maj_by_reorder_freq.xlsx')

- Seniority by majority department

In [120]:
# Moving seniority
df.drop_duplicates(subset = ['NUMCDE', 'DEPARTEMENT_NIV2']).groupby('DEPARTEMENT_NIV2').agg({'SENIORITY_CMD': 'mean'}).to_excel('./Vertbaudet/France/Output/'+today+'_senio_by_dep.xlsx')

In [11]:
# Absolute seniority
df.drop_duplicates('IDIND').groupby('DEPARTEMENT_MAJORITAIRE').agg({'SENIORITY_ABS': 'mean'}).to_excel('./Vertbaudet/France/Output/'+today+'_senio_abs_by_dep_maj.xlsx')

In [15]:
# Today seniority
df.drop_duplicates('IDIND').groupby('DEPARTEMENT_MAJORITAIRE').agg({'SENIORITY': 'mean'}).to_excel('./Vertbaudet/France/Output/'+today+'_today_senio_by_dep_maj.xlsx')

### Entry month by reorder delay

In [60]:
df['MONTH_RECRUITMENT'] = pd.to_datetime(df['DATPREMCDEWEB']).dt.month

In [30]:
df_entry_month= df.groupby(['FREQ_CAT', 'MONTH_RECRUITMENT']).agg({'IDIND': pd.Series.nunique}).unstack()

In [31]:
df_entry_month.to_excel('./Vertbaudet/France/Output/'+today+'_entry_month_by_reorder_freq.xlsx')

In [61]:
df_entry_month_small = df.groupby(['FREQ_CAT_SMALL', 'MONTH_RECRUITMENT']).agg({'IDIND': pd.Series.nunique}).unstack()
df_entry_month_small.to_excel('./Vertbaudet/France/Output/'+today+'_entry_month_by_reorder_freq_small.xlsx')

- Debiased version, only taking 2017-19 cohorts 

In [32]:
df['COHORT'] = df['DATPREMCDEWEB'].dt.year

In [33]:
df_entry_month_17_19= df[df['COHORT']<2020].groupby(['FREQ_CAT', 'MONTH_RECRUITMENT']).agg({'IDIND': pd.Series.nunique}).unstack()

In [34]:
df_entry_month_17_19.to_excel('./Vertbaudet/France/Output/'+today+'_entry_month_by_reorder_freq_17_19.xlsx')

In [35]:
gc.collect()

135

### Average nbr command by reorder delay

In [36]:
df_avg_nb_cmd = df.groupby(['FREQ_CAT']).agg({'IDIND': pd.Series.nunique, 'NUMCDE': pd.Series.nunique})

In [37]:
df_avg_nb_cmd['AVG_NB_CMD'] = df_avg_nb_cmd['NUMCDE']/df_avg_nb_cmd['IDIND']

In [38]:
df_avg_nb_cmd.to_excel('./Vertbaudet/France/Output/'+today+'_avg_nb_cmd_by_freq_cat.xlsx')

### Avg basket by reorder delay

In [40]:
df_avg_basket = df.groupby('FREQ_CAT').agg({'DN': sum, 'IDIND': pd.Series.nunique})

In [41]:
df_avg_basket['AVERAGE_BASKET'] = df_avg_basket.DN/df_avg_basket.IDIND

In [42]:
df_avg_basket.to_excel('./Vertbaudet/France/Output/'+today+'_avg_basket_by_freq_cat.xlsx')

### OT/TT/Recurring by reorder delay

In [43]:
df_cat_by_freq_cat = df.groupby(['FREQ_CAT', 'CLIENT_CATEGORY']).agg({'IDIND': pd.Series.nunique}).unstack()

In [44]:
df_cat_by_freq_cat.to_excel('./Vertbaudet/France/Output/'+today+'_cat_client_by_freq_cat.xlsx')

In [45]:
df_cat_by_freq_cat_17_19 = df[df['COHORT']<2020].groupby(['FREQ_CAT', 'CLIENT_CATEGORY']).agg({'IDIND': pd.Series.nunique}).unstack()

In [46]:
df_cat_by_freq_cat_17_19.to_excel('./Vertbaudet/France/Output/'+today+'_cat_client_by_freq_cat_17_19.xlsx')

In [52]:
### CAT_SMALL
df_cat_by_freq_cat_SMALL = df.groupby(['FREQ_CAT_SMALL', 'CLIENT_CATEGORY']).agg({'IDIND': pd.Series.nunique}).unstack()
df_cat_by_freq_cat_SMALL.to_excel('./Vertbaudet/France/Output/'+today+'_cat_client_by_freq_cat_SMALL.xlsx')

### Acquisition channel by reorder delay

- Add LEVIER column

In [None]:
# filter on cohort 19 and 20

In [48]:
df_acqu_chanel = df[(df['COHORT'].isin(range(2019,2021))) & (df['DATCDE']==df['DATPREMCDEWEB'])].groupby(['FREQ_CAT', 'LEVIER']).agg({'IDIND': pd.Series.nunique})

In [49]:
df_acqu_chanel.unstack().to_excel('./Vertbaudet/France/Output/'+today+'_acqu_channel_by_freq_cat.xlsx')

In [111]:
df_acqu_chanel_new_levier = df[(df['COHORT'].isin(range(2019,2021))) & (df['DATCDE']==df['DATPREMCDEWEB'])].groupby(['FREQ_CAT', 'NEW_LEVIER']).agg({'IDIND': pd.Series.nunique})

In [112]:
df_acqu_chanel_new_levier.unstack().to_excel('./Vertbaudet/France/Output/'+today+'_acqu_channel_new_levier_by_freq_cat.xlsx')

In [50]:
gc.collect()

5999

In [57]:
df_acqu_chanel_small = df[(df['COHORT'].isin(range(2019,2021))) & (df['DATCDE']==df['DATPREMCDEWEB'])].groupby(['FREQ_CAT_SMALL', 'LEVIER']).agg({'IDIND': pd.Series.nunique}).unstack()
df_acqu_chanel_small.to_excel('./Vertbaudet/France/Output/'+today+'_acqu_channel_by_freq_cat_small.xlsx')

In [59]:
df_acqu_chanel_small_new_levier = df[(df['COHORT'].isin(range(2019,2021))) & (df['DATCDE']==df['DATPREMCDEWEB'])].groupby(['FREQ_CAT_SMALL', 'NEW_LEVIER']).agg({'IDIND': pd.Series.nunique}).unstack()
df_acqu_chanel_small_new_levier.to_excel('./Vertbaudet/France/Output/'+today+'_acqu_channel_new_levier_by_freq_cat_small.xlsx')

### Cat discount by reorder delay

- Discount article

In [53]:
df_disc_art = df.groupby(['FREQ_CAT', 'DISCOUNT_CAT']).agg({'QTE': sum}).unstack()

In [54]:
df_disc_art.to_excel('./Vertbaudet/France/Output/'+today+'_discount_article_by_freq_cat.xlsx')

- Discount commande

In [58]:
df_disc_cmd = df.groupby(['FREQ_CAT', 'DISCOUNT_CAT_CMD']).agg({'NUMCDE': pd.Series.nunique}).unstack() 

In [59]:
df_disc_cmd.to_excel('./Vertbaudet/France/Output/'+today+'_discount_cmd_by_freq_cat.xlsx')

- Discount premier achat

In [60]:
df_disc_premha = df[df['DATCDE']==df['DATPREMCDEWEB']].groupby(['FREQ_CAT', 'DISCOUNT_CAT_CMD']).agg({'NUMCDE': pd.Series.nunique}).unstack() 

In [61]:
df_disc_premha.to_excel('./Vertbaudet/France/Output/'+today+'_discount_premha_by_freq_cat.xlsx')

### LTV by reorder delay

- FREQ_CAT_SMALL

In [25]:
df_ltv_by_freq_cat = df[df['SENIORITY']>365].drop_duplicates('IDIND').groupby('FREQ_CAT_SMALL').agg({'LTV_12': 'mean'})
df_ltv_by_freq_cat.to_excel('./Vertbaudet/France/Output/'+today+'_ltv_12_byfreq_cat_small.xlsx')

In [28]:
df_ltv_24_by_freq_cat = df[df['SENIORITY']>365].drop_duplicates('IDIND').groupby('FREQ_CAT_SMALL').agg({'LTV_24': 'mean'})
df_ltv_24_by_freq_cat.to_excel('./Vertbaudet/France/Output/'+today+'_ltv_24_by_freq_cat_small.xlsx')

- FREQ_CAT

In [36]:
df_ltv_by_freq_cat = df[df['SENIORITY']>365].drop_duplicates('IDIND').groupby('FREQ_CAT').agg({'LTV_12': 'mean'})
df_ltv_by_freq_cat.to_excel('./Vertbaudet/France/Output/'+today+'_ltv_12_byfreq_cat.xlsx')

In [37]:
df_ltv_24_by_freq_cat = df[df['SENIORITY']>365].drop_duplicates('IDIND').groupby('FREQ_CAT').agg({'LTV_24': 'mean'})
df_ltv_24_by_freq_cat.to_excel('./Vertbaudet/France/Output/'+today+'_ltv_24_by_freq_cat.xlsx')

### LTV by reorder delay

In [83]:
df[(df['CLIENT_CATEGORY'] != 'One-timer') & (df['SENIORITY'] > 365)].drop_duplicates('IDIND')['LTV_12'].mean()

193.05560829940842

In [82]:
df['SENIORITY'] = (datetime.today() - df['DATPREMCDEWEB']).dt.days

In [147]:
df_ltv_24 = df[df.SENIORITY > 730].drop_duplicates('IDIND').groupby('FREQ_CAT'.agg({'LTV_24': 'mean'})

In [145]:
df_ltv_12.to_excel('./Vertbaudet/France/Output/'+today+'_ltv_12_by_freq_cat.xlsx')

In [148]:
df_ltv_24.to_excel('./Vertbaudet/France/Output/'+today+'_ltv_24_by_freq_cat.xlsx')