In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import math
import importlib
from dateutil.relativedelta import relativedelta
import get_started_doing_something_variables
import get_stopped_doing_something_variables
import prepare_for_the_models
import read_a_combination_of_variables
import yaml
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display, Markdown

#### Helpers ####
import os
import sys
sys.path.insert(0, os.path.abspath('../'))
from helpers.s3_bucket_utils import S3BucketUtils
from helpers import settings

bucket = S3BucketUtils()
################

##### base columns ##### 
base_cols = ['spot_id',\
            'time',\
            'event']

# read the yaml file with a list of parameters needed for the report
with open(r'./parameters/started_doing_something_report_parameters.yaml') as file:
    parameters = yaml.load(file, Loader=yaml.FullLoader)

date_of_analysis = parameters['date_of_analysis']
date_dir = date_of_analysis.replace('-', '_')
### name of the data directory ###
churn_based_on_behaviour_dir = parameters['churn_based_on_behaviour_dir']
hs_list_filename = parameters['hs_list_filename']
hs_list_path = 'churn_analysis/data/'+date_dir+'/'+hs_list_filename

### model type ###
model_type = parameters['model_type']

# read the yaml file with data set parameters #
data_sets = [#'ALL_spots_wo_CB_cancellation_confirmed',\
                     #'ALL_spots_wo_CB_cancellation_requested',\
                     #'ALL_spots_with_CB_cancellation_confirmed',\
                     'ALL_spots_with_CB_cancellation_requested',\
                     #'CAN_CANCEL_spots_wo_CB_cancellation_confirmed',\
                     'CAN_CANCEL_spots_wo_CB_cancellation_requested',\
                     #'CAN_CANCEL_spots_with_CB_cancellation_confirmed',\
                     #'CAN_CANCEL_spots_with_CB_cancellation_requested'
]
for data_set_name in data_sets:
    with open(r'./parameters/data_sets.yaml') as file:
        data_sets_parameters = yaml.load(file, Loader=yaml.FullLoader)

    spots_set = data_sets_parameters[data_set_name]['spots_set']
    with_wo_CB = data_sets_parameters[data_set_name]['with_wo_CB']
    event_date_full_name = data_sets_parameters[data_set_name]['event_date_type']

    if with_wo_CB == 'with_CB':
        with_wo_CB_boolean = True
    else:
        with_wo_CB_boolean = False

    if event_date_full_name == 'cancellation_requested':
        event_date = 'canc_req'
    elif event_date_full_name == 'cancellation_confirmed':
        event_date = 'canc_conf'
    
    ##### Model 0: all variables - prepare data for the model ####
    cols_to_use = read_a_combination_of_variables.\
    main(model_number=0, dir_name='combinations_of_variables_that_are_not_dependent/')
    #### get behavioural variables ####
    (variables_to_use_for_the_model, did_something_last_month_vars, did_something_before_and_didnt_last_month_vars) = \
    get_started_doing_something_variables.main(date_of_analysis=date_of_analysis, variables_to_use_for_the_model=cols_to_use)
    cols = base_cols + variables_to_use_for_the_model + \
    did_something_before_and_didnt_last_month_vars + did_something_last_month_vars
    
    #### get data for the model ###
    (data, base_df, df_timeline_all_vars) = \
    prepare_for_the_models.get_data_for_the_MV_Cox_model(date_of_analysis=date_of_analysis, spots_set=spots_set, \
                                with_wo_CB=with_wo_CB, event_date=event_date, columns=cols, data_dir=churn_based_on_behaviour_dir,\
                                C = 100)
    
    bucket.store_csv_to_s3(data_frame = data, \
        file_name = 'data_tv_'+spots_set+'_spots_'+with_wo_CB+'_'+event_date+'.csv', \
        dir = '/'+churn_based_on_behaviour_dir + 'data/'+date_dir+'/exports/data_used_for_each_model/')

    bucket.store_csv_to_s3(data_frame = df_timeline_all_vars, \
            file_name = 'df_timeline_'+spots_set+'_spots_'+with_wo_CB+'_'+event_date+'.csv', \
            dir = '/'+churn_based_on_behaviour_dir + 'data/'+date_dir+'/exports/data_used_for_each_model/')