In [1]:
import pandas as pd
import yaml
import os
import sys
from dateutil.relativedelta import relativedelta
sys.path.insert(0, os.path.abspath('../'))
from helpers.s3_bucket_utils import S3BucketUtils
import re

bucket = S3BucketUtils()

from dict_of_vars import variable_map
import read_a_combination_of_variables

def add_month(date, m):
    ddd = pd.to_datetime(date, format='%Y-%m-%d')
    ddd2 = ddd + relativedelta(months=m)
    return (str(ddd2))[0:10]

In [2]:
with open(r'./parameters/started_doing_something_report_parameters.yaml') as file:
    model_params = yaml.load(file, Loader=yaml.FullLoader)

date_of_analysis = '2022-09-01'# model_params['date_of_analysis']

date_dir = date_of_analysis.replace('-', '_')

In [3]:
previous_date_of_analysis = add_month(date_of_analysis, -1)

In [4]:
df = pd.read_csv('data/' + date_dir + 
                 '/exports/important_variables/important_variables_sorted_by_variable_name.csv')

In [5]:
df.columns

Index(['model', 'variable', 'interpretation', 'important_variables_group',
       'exp(coef) - AVERAGE', 'p value - AVERAGE',
       'ALL_spots_with_CB_cancellation_requested_exp(coef)_and_p_value',
       'CAN_CANCEL_spots_wo_CB_cancellation_requested_exp(coef)_and_p_value',
       '%_of_ALL_spots_with_CB_canc_req',
       'last_month_%_of_ALL_spots_with_CB_canc_req',
       '%_of_CAN_CANCEL_spots_wo_CB_canc_req',
       'last_month_%_of_CAN_CANCEL_spots_wo_CB_canc_req'],
      dtype='object')

In [6]:
all_spots_sets = ['ALL_spots_with_CB_canc_req',\
                     'CAN_CANCEL_spots_wo_CB_canc_req']

In [7]:
df_ = bucket.load_csv_from_s3(file_name = 'churn_analysis_based_on_behaviour/data/' + date_dir + \
                    '/exports/data_used_for_each_model/data_tv_'+all_spots_sets[0]+'.csv')
df_c = bucket.load_csv_from_s3(file_name = 'churn_analysis_based_on_behaviour/data/' + date_dir + \
                    '/exports/data_used_for_each_model/data_tv_'+all_spots_sets[1]+'.csv')

In [8]:
sorted(df_.columns.tolist())

['Add-Ons',
 'Added.events.manually.Edited.events.total',
 'Added.food.Edited.food.total',
 'Added.specials.Edited.specials.total',
 'Annual Commitment with 2 semi-annual payments',
 'Annual Prepay',
 'Basicinfo.page.views.total',
 'Bday.club.downloads',
 'C.Changed.inquiry.status.total',
 'Changed.picture.or.text.total',
 'Clicked.emails.total',
 'Consumer.job.listings.inquiry.total',
 'Contract type',
 'Date Cancellation Confirmed',
 'Date Cancellation Requested',
 'Delinquent ',
 'Downloaded.qrcode.flyers.total',
 'Downloaded.regular.flyers.total',
 'Emails.collected.total',
 'Emails.sent.manually.Scheduled.emails.sent.total',
 'FB.Posts.disliked.total',
 'FB.Posts.liked.total',
 'Fb.page.likes.analysis.HS',
 'Fb.page.likes.analysis.avg',
 'Fb.page.likes.analysis.avg.div10',
 'Fb.page.likes.analysis.avg.div100',
 'Fb.page.likes.analysis.avg.div1000',
 'Fb.page.likes.analysis.avg.log2',
 'Fb.page.likes.analysis.last.month',
 'Fb.page.likes.analysis.last.month.div10',
 'Fb.page.likes.

In [9]:
def get_last_month_data(df, last_month_date, customers=False, cancelled=False):
    
    df_ = df.copy()
    df_ = df_[df_['right_limit']==last_month_date]
    
    if customers:
        df_ = df_[df_['event']==False]
    if cancelled:
        df_ = df_[df_['event']==True]
    
    return(df_)

def get_stopped_last_month_data(df, last_month_date, var_had_and_didnt, customers=False, cancelled=False):
    
    df_ = get_last_month_data(df, last_month_date, customers, cancelled)
    
    df_ = df_[(df_[var_had_and_didnt]\
               .map(lambda x: x==1))]
    return(df_)

def get_never_did_last_month_data(df, last_month_date, var_had_and_didnt, var_had_last, customers=False, cancelled=False):
    
    df_ = get_last_month_data(df, last_month_date, customers, cancelled)
    
    df_ = df_[(df_[[var_had_and_didnt, var_had_last]]\
               .apply(lambda x: x[0]==0 and x[1]==0, axis=1))]
    return(df_)

def get_started_last_month_data(df, last_month_date, var_had_last, period, customers=False, cancelled=False):
    
    df_ = get_last_month_data(df, last_month_date, customers, cancelled)
    
    df_ = df_[df_[var_had_last].map(lambda x: x==1)]
    
    df_ = df_[df_['time'] <= period]
    
    return(df_)

def get_continued_last_month_data(df, last_month_date, var_had_last, period, customers=False, cancelled=False):
    
    df_ = get_last_month_data(df, last_month_date, customers, cancelled)
    
    df_ = df_[df_[var_had_last].map(lambda x: x==1)]
    
    df_ = df_[df_['time'] > period]
    
    return(df_)

In [10]:
model_numbers = \
read_a_combination_of_variables.\
get_a_list_of_model_numbers(dir_name='combinations_of_variables_that_are_not_dependent/')

variables_coefficients = dict()
for k,v in variable_map.items():
    print(k)
    variables_coefficients[k] = {'coef_mean': None, 'ALL':{'coefs':[], 'n_non0':[], 'p':[]}, 'CAN_CANCEL':{'coefs':[], 'n_non0':[], 'p':[]}}
    for i in [n for n in model_numbers if n!=0]:
        path = 'data/' + date_dir + \
        '/exports/stopped_doing_something/model_'+str(i) + \
        '/significant_variables_sorted_by_variable_name.csv'
        df_mod = pd.read_csv(path)
        variables_coefficients[k]['ALL']['n_non0'].append(df_[df_[k]==1]['spot_id'].nunique())
        if k in df_mod.variable.tolist():
            set = False
            if float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]) <= .05:
                variables_coefficients[k]['ALL']['p'].append(float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]))
                variables_coefficients[k]['ALL']['coefs'].append(float(df_mod.loc[df_mod.variable==k, 'exp(coef)'].iloc[0]))
                set = True
            if set == False and float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]) <= .2:
                variables_coefficients[k]['ALL']['p'].append(float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]))
                variables_coefficients[k]['ALL']['coefs'].append(float(df_mod.loc[df_mod.variable==k, 'exp(coef)'].iloc[0]))
                set = True
            if set == False and float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]) <= .4:
                variables_coefficients[k]['ALL']['p'].append(float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]))
                variables_coefficients[k]['ALL']['coefs'].append(float(df_mod.loc[df_mod.variable==k, 'exp(coef)'].iloc[0]))
        variables_coefficients[k]['CAN_CANCEL']['n_non0'].append(df_c[df_c[k]==1]['spot_id'].nunique())
        if k in df_mod.variable.tolist():
            set = False
            if float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]) <= .05:
                variables_coefficients[k]['CAN_CANCEL']['p'].append(float(df_mod.loc[df_mod.variable==k, 'p value.1'].iloc[0]))
                variables_coefficients[k]['CAN_CANCEL']['coefs'].append(float(df_mod.loc[df_mod.variable==k, 'exp(coef).1'].iloc[0]))
                set = True
            if set == False and float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]) <= .2:
                variables_coefficients[k]['CAN_CANCEL']['p'].append(float(df_mod.loc[df_mod.variable==k, 'p value.1'].iloc[0]))
                variables_coefficients[k]['CAN_CANCEL']['coefs'].append(float(df_mod.loc[df_mod.variable==k, 'exp(coef).1'].iloc[0]))
                set = True
            if set == False and float(df_mod.loc[df_mod.variable==k, 'p value'].iloc[0]) <= .4:
                variables_coefficients[k]['CAN_CANCEL']['p'].append(float(df_mod.loc[df_mod.variable==k, 'p value.1'].iloc[0]))
                variables_coefficients[k]['CAN_CANCEL']['coefs'].append(float(df_mod.loc[df_mod.variable==k, 'exp(coef).1'].iloc[0]))

had_added_food_edited_food_before_and_didnt_last_4_months
had_added_food_edited_food_last_4_months
had_added_specials_edited_specials_last_4_months
had_catering_submissions_last_4_months
had_changed_picture_or_text_last_4_months
had_clicked_emails_before_and_didnt_last_4_months
had_clicked_emails_last_4_months
had_downloaded_qrcode_flyers_before_and_didnt_last_3_months
had_downloaded_qrcode_flyers_last_3_months
had_emails_sent_manually_scheduled_emails_sent_last_3_months
had_online_orders_before_and_didnt_last_2_months
had_online_orders_last_2_months
had_other_non_contactless_menu_qr_flyer_scans_last_2_months
had_posts_liked_before_and_didnt_last_4_months
had_posts_liked_last_4_months
had_posts_on_facebook_before_and_didnt_last_2_months
had_posts_on_facebook_last_2_months
had_posts_seen_before_and_didnt_last_4_months
had_posts_seen_last_4_months
had_preview_page_views_email_before_and_didnt_last_4_months
had_preview_page_views_email_last_4_months
had_properly_used_catering_inquiries_la

In [11]:
for k,v in  variables_coefficients.copy().items():
    print(k)
    p_all = pd.Series(variables_coefficients[k]['ALL']['p'])
    c_all = pd.Series(variables_coefficients[k]['ALL']['coefs'])
    p_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['p'])
    c_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['coefs'])
    # df_mean = pd.DataFrame([(i.mean()) for i in [p_all, c_all, p_cc, c_cc]], columns=['mean'])
    # df_mean.index = ['all_p', 'all_c', 'cc_p', 'cc_c']
    # print(df_mean)
    s_mean = pd.Series([i.mean() for i in [c_all, c_cc]])
    print(s_mean[s_mean.notnull()].mean())
    variables_coefficients[k]['coef_mean'] = s_mean[s_mean.notnull()].mean()

had_added_food_edited_food_before_and_didnt_last_4_months


  p_all = pd.Series(variables_coefficients[k]['ALL']['p'])
  c_all = pd.Series(variables_coefficients[k]['ALL']['coefs'])
  p_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['p'])
  c_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['coefs'])


nan
had_added_food_edited_food_last_4_months
0.8033333333333333
had_added_specials_edited_specials_last_4_months
nan
had_catering_submissions_last_4_months
0.7396190476190476
had_changed_picture_or_text_last_4_months
0.725
had_clicked_emails_before_and_didnt_last_4_months
nan
had_clicked_emails_last_4_months
0.6681707317073171
had_downloaded_qrcode_flyers_before_and_didnt_last_3_months
nan
had_downloaded_qrcode_flyers_last_3_months
nan
had_emails_sent_manually_scheduled_emails_sent_last_3_months
0.7366666666666666
had_online_orders_before_and_didnt_last_2_months
nan
had_online_orders_last_2_months
nan
had_other_non_contactless_menu_qr_flyer_scans_last_2_months
nan
had_posts_liked_before_and_didnt_last_4_months
nan
had_posts_liked_last_4_months
0.6570588235294117
had_posts_on_facebook_before_and_didnt_last_2_months
nan
had_posts_on_facebook_last_2_months
0.7388571428571429
had_posts_seen_before_and_didnt_last_4_months
nan
had_posts_seen_last_4_months
nan
had_preview_page_views_email_bef

In [12]:
df_calc_ = pd.DataFrame()
df_calc_['variable'] = pd.Series([k for k,v in variables_coefficients.items()])
df_calc_['coef_value'] = pd.Series([v['coef_mean'] for k,v in variables_coefficients.items()])
df_calc_

Unnamed: 0,variable,coef_value
0,had_added_food_edited_food_before_and_didnt_la...,
1,had_added_food_edited_food_last_4_months,0.803333
2,had_added_specials_edited_specials_last_4_months,
3,had_catering_submissions_last_4_months,0.739619
4,had_changed_picture_or_text_last_4_months,0.725
5,had_clicked_emails_before_and_didnt_last_4_months,
6,had_clicked_emails_last_4_months,0.668171
7,had_downloaded_qrcode_flyers_before_and_didnt_...,
8,had_downloaded_qrcode_flyers_last_3_months,
9,had_emails_sent_manually_scheduled_emails_sent...,0.736667


In [13]:
variables_coefficients = dict()
for k,v in variable_map.items():
    print(k)
    variables_coefficients[k] = {'coef_mean': None, 'ALL':{'coefs':[], 'n_non0':[], 'p':[]}, 'CAN_CANCEL':{'coefs':[], 'n_non0':[], 'p':[]}}
    for i in [n for n in model_numbers if n!=0]:
        path = 'data/' + date_dir + \
        '/exports/coefficients_and_pvalues/started_doing_something/model_'+str(i) + \
        '/coef_and_pvalues_ALL_spots_with_CB_cancellation_requested_p_below_0_2.csv'
        df_mod = pd.read_csv(path)
        variables_coefficients[k]['ALL']['n_non0'].append(df_[df_[k]==1]['spot_id'].nunique())
        if k in df_mod.covariate.tolist():
            set = False
            if df_mod.loc[df_mod.covariate==k]['p'].iloc[0] <= .05:
                variables_coefficients[k]['ALL']['p'].append(df_mod.loc[df_mod.covariate==k]['p'].iloc[0])
                variables_coefficients[k]['ALL']['coefs'].append(df_mod.loc[df_mod.covariate==k]['exp(coef)'].iloc[0])
                set = True
            if set == False and df_mod.loc[df_mod.covariate==k]['p'].iloc[0] <= .2:
                variables_coefficients[k]['ALL']['p'].append(df_mod.loc[df_mod.covariate==k]['p'].iloc[0])
                variables_coefficients[k]['ALL']['coefs'].append(df_mod.loc[df_mod.covariate==k]['exp(coef)'].iloc[0])
                set = True
            if set == False and df_mod.loc[df_mod.covariate==k]['p'].iloc[0] <= .4:
                variables_coefficients[k]['ALL']['p'].append(df_mod.loc[df_mod.covariate==k]['p'].iloc[0])
                variables_coefficients[k]['ALL']['coefs'].append(df_mod.loc[df_mod.covariate==k]['exp(coef)'].iloc[0])
        path = 'data/' + date_dir + \
        '/exports/coefficients_and_pvalues/started_doing_something/model_'+str(i) + \
        '/coef_and_pvalues_CAN_CANCEL_spots_wo_CB_cancellation_requested_p_below_0_2.csv'
        df_mod = pd.read_csv(path)
        variables_coefficients[k]['CAN_CANCEL']['n_non0'].append(df_c[df_c[k]==1]['spot_id'].nunique())
        if k in df_mod.covariate.tolist():
            set = False
            if df_mod.loc[df_mod.covariate==k]['p'].iloc[0] <= .05:
                variables_coefficients[k]['CAN_CANCEL']['p'].append(df_mod.loc[df_mod.covariate==k]['p'].iloc[0])
                variables_coefficients[k]['CAN_CANCEL']['coefs'].append(df_mod.loc[df_mod.covariate==k]['exp(coef)'].iloc[0])
                set = True
            if set == False and df_mod.loc[df_mod.covariate==k]['p'].iloc[0] <= .2:
                variables_coefficients[k]['CAN_CANCEL']['p'].append(df_mod.loc[df_mod.covariate==k]['p'].iloc[0])
                variables_coefficients[k]['CAN_CANCEL']['coefs'].append(df_mod.loc[df_mod.covariate==k]['exp(coef)'].iloc[0])
                set = True
            if set == False and df_mod.loc[df_mod.covariate==k]['p'].iloc[0] <= .4:
                variables_coefficients[k]['CAN_CANCEL']['p'].append(df_mod.loc[df_mod.covariate==k]['p'].iloc[0])
                variables_coefficients[k]['CAN_CANCEL']['coefs'].append(df_mod.loc[df_mod.covariate==k]['exp(coef)'].iloc[0])

had_added_food_edited_food_before_and_didnt_last_4_months
had_added_food_edited_food_last_4_months
had_added_specials_edited_specials_last_4_months
had_catering_submissions_last_4_months
had_changed_picture_or_text_last_4_months
had_clicked_emails_before_and_didnt_last_4_months
had_clicked_emails_last_4_months
had_downloaded_qrcode_flyers_before_and_didnt_last_3_months
had_downloaded_qrcode_flyers_last_3_months
had_emails_sent_manually_scheduled_emails_sent_last_3_months
had_online_orders_before_and_didnt_last_2_months
had_online_orders_last_2_months
had_other_non_contactless_menu_qr_flyer_scans_last_2_months
had_posts_liked_before_and_didnt_last_4_months
had_posts_liked_last_4_months
had_posts_on_facebook_before_and_didnt_last_2_months
had_posts_on_facebook_last_2_months
had_posts_seen_before_and_didnt_last_4_months
had_posts_seen_last_4_months
had_preview_page_views_email_before_and_didnt_last_4_months
had_preview_page_views_email_last_4_months
had_properly_used_catering_inquiries_la

In [14]:
for k,v in  variables_coefficients.copy().items():
    print(k)
    p_all = pd.Series(variables_coefficients[k]['ALL']['p'])
    c_all = pd.Series(variables_coefficients[k]['ALL']['coefs'])
    p_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['p'])
    c_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['coefs'])
    # df_mean = pd.DataFrame([(i.mean()) for i in [p_all, c_all, p_cc, c_cc]], columns=['mean'])
    # df_mean.index = ['all_p', 'all_c', 'cc_p', 'cc_c']
    # print(df_mean)
    s_mean = pd.Series([i.mean() for i in [c_all, c_cc]])
    print(s_mean[s_mean.notnull()].mean())
    variables_coefficients[k]['coef_mean'] = s_mean[s_mean.notnull()].mean()

had_added_food_edited_food_before_and_didnt_last_4_months
nan
had_added_food_edited_food_last_4_months
nan
had_added_specials_edited_specials_last_4_months
0.7039377703533377
had_catering_submissions_last_4_months
0.7362431111236043
had_changed_picture_or_text_last_4_months
0.7513655343111538
had_clicked_emails_before_and_didnt_last_4_months
1.6273123617201022
had_clicked_emails_last_4_months
1.0086698853452047
had_downloaded_qrcode_flyers_before_and_didnt_last_3_months
nan
had_downloaded_qrcode_flyers_last_3_months
nan
had_emails_sent_manually_scheduled_emails_sent_last_3_months
0.7258682163363028
had_online_orders_before_and_didnt_last_2_months
1.7543159522337115
had_online_orders_last_2_months
1.2469610792579122
had_other_non_contactless_menu_qr_flyer_scans_last_2_months
0.7029090268606598
had_posts_liked_before_and_didnt_last_4_months
1.1965118107272725
had_posts_liked_last_4_months
0.740054233842495
had_posts_on_facebook_before_and_didnt_last_2_months
1.4912373352845147
had_posts_

  p_all = pd.Series(variables_coefficients[k]['ALL']['p'])
  c_all = pd.Series(variables_coefficients[k]['ALL']['coefs'])
  p_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['p'])
  c_cc = pd.Series(variables_coefficients[k]['CAN_CANCEL']['coefs'])


1.2091821448335098
had_posts_seen_before_and_didnt_last_4_months
1.4051483881664106
had_posts_seen_last_4_months
1.1679070677394057
had_preview_page_views_email_before_and_didnt_last_4_months
nan
had_preview_page_views_email_last_4_months
nan
had_properly_used_catering_inquiries_last_4_months
0.6697731130802644
had_properly_used_online_orders_inquiries_before_and_didnt_last_2_months
1.7169548925565437
had_properly_used_online_orders_inquiries_last_2_months
nan
had_properly_used_private_parties_inquiries_before_and_didnt_last_3_months
1.5370657342196548
had_properly_used_private_parties_inquiries_last_3_months
0.8616956753677198
had_properly_used_reservations_inquiries_last_2_months
0.4840252959154454
had_qr_code_menu_scans_last_2_months
0.6715682338508253
had_tickets_before_and_didnt_last_4_months
0.6216393685968079
had_tickets_last_4_months
0.8028941576056399
had_visited_admin_before_and_didnt_last_2_months
1.2746474392458045
had_visited_admin_last_2_months
0.8307982038396311
had_visi

In [15]:
df_calc = pd.DataFrame()
df_calc['variable'] = pd.Series([k for k,v in variables_coefficients.items()])
df_calc['coef_value'] = pd.Series([v['coef_mean'] for k,v in variables_coefficients.items()])
df_calc

Unnamed: 0,variable,coef_value
0,had_added_food_edited_food_before_and_didnt_la...,
1,had_added_food_edited_food_last_4_months,
2,had_added_specials_edited_specials_last_4_months,0.703938
3,had_catering_submissions_last_4_months,0.736243
4,had_changed_picture_or_text_last_4_months,0.751366
5,had_clicked_emails_before_and_didnt_last_4_months,1.627312
6,had_clicked_emails_last_4_months,1.00867
7,had_downloaded_qrcode_flyers_before_and_didnt_...,
8,had_downloaded_qrcode_flyers_last_3_months,
9,had_emails_sent_manually_scheduled_emails_sent...,0.725868


In [16]:
df_calc[df_calc_['coef_value'].isnull()]

Unnamed: 0,variable,coef_value
0,had_added_food_edited_food_before_and_didnt_la...,
2,had_added_specials_edited_specials_last_4_months,0.703938
5,had_clicked_emails_before_and_didnt_last_4_months,1.627312
7,had_downloaded_qrcode_flyers_before_and_didnt_...,
8,had_downloaded_qrcode_flyers_last_3_months,
10,had_online_orders_before_and_didnt_last_2_months,1.754316
11,had_online_orders_last_2_months,1.246961
12,had_other_non_contactless_menu_qr_flyer_scans_...,0.702909
13,had_posts_liked_before_and_didnt_last_4_months,1.196512
15,had_posts_on_facebook_before_and_didnt_last_2_...,1.491237


In [17]:
df_calc.loc[df_calc['coef_value'].isnull(), 'coef_value'] = df_calc_.loc[df_calc['coef_value'].isnull(), 'coef_value']

In [18]:
df_calc

Unnamed: 0,variable,coef_value
0,had_added_food_edited_food_before_and_didnt_la...,
1,had_added_food_edited_food_last_4_months,0.803333
2,had_added_specials_edited_specials_last_4_months,0.703938
3,had_catering_submissions_last_4_months,0.736243
4,had_changed_picture_or_text_last_4_months,0.751366
5,had_clicked_emails_before_and_didnt_last_4_months,1.627312
6,had_clicked_emails_last_4_months,1.00867
7,had_downloaded_qrcode_flyers_before_and_didnt_...,
8,had_downloaded_qrcode_flyers_last_3_months,
9,had_emails_sent_manually_scheduled_emails_sent...,0.725868


In [19]:
variables_ = variable_map.keys()
spots_never_did_customers_ = []
spots_customers_ = []
Not_Doing_ = [] 
Percent_Not_Doing_ = []
Continue_ = []
Stopped_Doing_ = []
Percent_Who_Quit_ = []
Coef_Continue_ = []
Continue_Churn_Imrovement_ = []

for k,v in variable_map.items():
#     print(k)
#     print(v)
#     Not_Doing = spots_never_did_customers
    var_had_and_didnt = v['variables'][1]
    var_have = v['variables'][0]
    months = v['months']
    print(var_had_and_didnt)
    print(var_have)
    
    spots_never_did_customers = get_never_did_last_month_data(df_, previous_date_of_analysis, 
                              var_had_and_didnt, var_have, customers=True).shape[0]
    spots_customers = get_last_month_data(df_, previous_date_of_analysis, customers=True).shape[0]
    Not_Doing = spots_never_did_customers
    Percent_Not_Doing = Not_Doing/spots_customers
    
    Continue = get_continued_last_month_data(df_, previous_date_of_analysis, var_have, 
                                             months, customers=True).shape[0]
    
    Stopped_Doing = get_stopped_last_month_data(df_, previous_date_of_analysis, var_had_and_didnt, 
                                                customers=True).shape[0]
    
    Percent_Who_Quit = Stopped_Doing/(Continue+Stopped_Doing) # spots_customers
    
    Coef_Continue = variables_coefficients[var_have]['coef_mean']
    
    Continue_Churn_Imrovement = (1-Coef_Continue) * Percent_Not_Doing * (1-Percent_Who_Quit)
    
    print(spots_never_did_customers, spots_customers, Not_Doing, 
          Percent_Not_Doing, Continue, Stopped_Doing, Percent_Who_Quit, 
          Coef_Continue, Continue_Churn_Imrovement)
    spots_never_did_customers_.append(spots_never_did_customers)
    spots_customers_.append(spots_customers)
    Not_Doing_.append(Not_Doing)
    Percent_Not_Doing_.append(Percent_Not_Doing)
    Continue_.append(Continue)
    Stopped_Doing_.append(Stopped_Doing)
    Percent_Who_Quit_.append(Percent_Who_Quit)
    Coef_Continue_.append(Coef_Continue)
    Continue_Churn_Imrovement_.append(Continue_Churn_Imrovement)
    print('------------------------------------------------------')
    

had_added_food_edited_food_before_and_didnt_last_4_months
had_added_food_edited_food_last_4_months
1827 3839 1827 0.4759051836415733 1285 566 0.30578065910318747 nan nan
------------------------------------------------------
had_added_food_edited_food_before_and_didnt_last_4_months
had_added_food_edited_food_last_4_months
1827 3839 1827 0.4759051836415733 1285 566 0.30578065910318747 nan nan
------------------------------------------------------
had_added_specials_edited_specials_before_and_didnt_last_4_months
had_added_specials_edited_specials_last_4_months
2544 3839 2544 0.6626725709820266 559 641 0.5341666666666667 0.7039377703533377 0.09139292188321609
------------------------------------------------------
had_catering_submissions_before_and_didnt_last_4_months
had_catering_submissions_last_4_months
2569 3839 2569 0.6691846835113311 1090 105 0.08786610878661087 0.7362431111236043 0.16099352010484164
------------------------------------------------------
had_changed_picture_or_text_

1157 3839 1157 0.30138056785621253 1988 404 0.1688963210702341 0.8376760729244761 0.040658653557491466
------------------------------------------------------
had_visited_qrcode_flyers_page_before_and_didnt_last_4_months
had_visited_qrcode_flyers_page_last_4_months
1756 3839 1756 0.4574107840583485 819 1026 0.5560975609756098 0.665768283312089 0.06786433382659536
------------------------------------------------------
had_visited_qrcode_flyers_page_before_and_didnt_last_4_months
had_visited_qrcode_flyers_page_last_4_months
1756 3839 1756 0.4574107840583485 819 1026 0.5560975609756098 0.665768283312089 0.06786433382659536
------------------------------------------------------
had_visited_special_page_before_and_didnt_last_4_months
had_visited_special_page_last_4_months
1321 3839 1321 0.34410002604845014 1461 809 0.3563876651982379 0.6875361258887464 0.06920044342277913
------------------------------------------------------
had_number_of_requests_for_new_text_fragment_before_and_didnt_last

In [20]:
df_max_imp = pd.DataFrame()
df_max_imp['variables_'] = variables_
df_max_imp['spots_never_did_customers'] = spots_never_did_customers_
df_max_imp['spots_customers'] = spots_customers_
df_max_imp['Not_Doing'] = Not_Doing_
df_max_imp['Percent_Not_Doing'] = Percent_Not_Doing_
df_max_imp['Continue'] = Continue_
df_max_imp['Stopped_Doing'] = Stopped_Doing_
df_max_imp['Percent_Who_Quit'] = Percent_Who_Quit_
df_max_imp['Coef_Continue'] = Coef_Continue_
df_max_imp['Churn_Imrovement'] = Continue_Churn_Imrovement_

In [22]:
df_max_imp['variables'] = df_max_imp['variables_'].map(lambda x: x[4:-31] if re.search('didnt_last_', x) else x[4:-14])

In [25]:
df_max_imp = df_max_imp[['variables', 'spots_never_did_customers', 'spots_customers',
       'Not_Doing', 'Percent_Not_Doing', 'Continue', 'Stopped_Doing',
       'Percent_Who_Quit', 'Coef_Continue', 'Churn_Imrovement']]

In [28]:
df_max_imp.drop_duplicates()[['variables', 'Churn_Imrovement']]

Unnamed: 0,variables,Churn_Imrovement
0,added_food_edited_food,
2,added_specials_edited_specials,0.091393
3,catering_submissions,0.160994
4,changed_picture_or_text,0.078205
5,clicked_emails,-0.00202
7,downloaded_qrcode_flyers,
9,emails_sent_manually_scheduled_emails_sent,0.093907
10,online_orders,-0.141614
12,other_non_contactless_menu_qr_flyer_scans,0.091805
13,posts_liked,0.092932
