In [1]:
import pandas as pd
import numpy as np
import datetime
# import local funcionts
from functions import preprocessing_data, vip_criteria, last_time_vip, last_month_status

In [2]:
import json
with open("pur_list.json", "r") as fp:
     purchase_list = json.load(fp)

In [3]:
threshold_amount= 100

In [4]:
# creating a DF with the purchase list
df = pd.DataFrame(purchase_list)

# change column format do date
df.date = df.date.astype('datetime64[ns]')

# generate date_month column
df['date_month'] = df.date.to_numpy().astype('datetime64[M]')

df.head(11)

Unnamed: 0,user,amount,date,date_month
0,9,16.1,2020-12-16,2020-12-01
1,8,42.96,2021-01-21,2021-01-01
2,2,44.15,2020-12-13,2020-12-01
3,1,15.96,2021-01-19,2021-01-01
4,7,47.07,2020-12-20,2020-12-01
5,3,27.49,2021-01-20,2021-01-01
6,8,25.95,2020-12-23,2020-12-01
7,7,22.4,2021-02-13,2021-02-01
8,3,38.5,2021-01-18,2021-01-01
9,5,35.45,2021-01-13,2021-01-01


## temporary columns

In [10]:
# calculates purchases per month
monthly_purchases_df = df.groupby(['user','date_month'],as_index=False).agg(monthly_amount=('amount','sum'))

# column amount_threshold: check if the monthly amount is highier than the threshold
monthly_purchases_df['amount_threshold'] = np.where(monthly_purchases_df.monthly_amount>=threshold_amount,1,0)

In [11]:
monthly_purchases_df

Unnamed: 0,user,date_month,monthly_amount,amount_threshold
0,1,2020-12-01,99.2,0
1,1,2021-01-01,36.86,0
2,1,2021-02-01,133.32,1
3,1,2021-03-01,121.32,1
4,1,2021-05-01,121.32,1
5,2,2020-12-01,44.15,0
6,2,2021-01-01,11.19,0
7,2,2021-02-01,94.92,0
8,3,2020-12-01,40.13,0
9,3,2021-01-01,88.69,0


* user 1 : vip for 1 month
* user 2 : no vip
* user 3 : no vip
* user 4 : no vip
* user 5 : vip for 3 months
* user 6 : no vip
* user 7 : no vip
* user 8 : no vip
* user 9 : vip for 2 months
* user 10 : vip for 6 months


In [15]:
def user_check(user,df_group,user_last_month,criteria_months):
    # this list is going to be populated with 1 if the analized monthly amount is highier than the threshold
    check_list=[]
    # per user_id, analyze if the last n months the amounts were higher than the criteria
    for c in range(0,criteria_months):
        # for the current month
        if c==0:
            user_last_month
        # for the previous months     
        else:
            user_last_month = (user_last_month.replace(day=1)  - datetime.timedelta(days=1)).replace(day=1)
        # check if the amount of the analized month is higher than the threshold 
        criteria = df_group[(df_group.user==user)&(df_group.date_month==user_last_month)].amount_threshold.max()
        # append the user list
        check_list.append(criteria)
    # return true or false if the number of the last consecutive months amounts are higher than the threshold
    return sum(check_list)==criteria_months

In [22]:
def vip_criteria(df_group,criteria_months):
    print(str(datetime.datetime.now()) +': starting VIP criteria' )
    # it generates a list with the total number of customers that we are going to analyze 
    users_list = df_group.user.unique()
    users_status_dict = {}
    users_vip_dict = {}
    for u in users_list:
        print('user: '+str(u))
        # it calculates the last month for the current user
        user_last_month = df_group[df_group.user == u]['date_month'].max().replace(day=1)
        # it checks if the user is vip or not and add it to the dictionary
        if user_check(u,df_group,user_last_month,criteria_months):
            print(user_check(u,df_group,user_last_month,criteria_months))
            users_status_dict[u]=True
            users_vip_dict[u]=True
        else:
            print(user_check(u,df_group,user_last_month,criteria_months))
            users_status_dict[u]=False
            continue
    # returns a dict with the vip users and the status of all of the users
    print(str(datetime.datetime.now()) +': VIP criteria ready' )
    return users_vip_dict, users_status_dict

In [23]:
criteria_months = 3
vip_dict, status_dict  = vip_criteria(monthly_purchases_df,criteria_months)
print(vip_dict)

user: 1
False
user: 2
False
user: 3
False
user: 4
False
user: 5
True
user: 6
False
user: 7
False
user: 8
False
user: 9
False
user: 10
True
{5: True, 10: True}


* user 1 : vip for 1 month
* user 2 : no vip
* user 3 : no vip
* user 4 : no vip
* user 5 : vip for 3 months
* user 6 : no vip
* user 7 : no vip
* user 8 : no vip
* user 9 : vip for 2 months
* user 10 : vip for 6 months


In [24]:
criteria_months = 2
vip_dict, status_dict  = vip_criteria(monthly_purchases_df,criteria_months)
print(vip_dict)

user: 1
False
user: 2
False
user: 3
False
user: 4
False
user: 5
True
user: 6
False
user: 7
False
user: 8
False
user: 9
True
user: 10
True
{5: True, 9: True, 10: True}


In [25]:
criteria_months = 1
vip_dict, status_dict  = vip_criteria(monthly_purchases_df,criteria_months)
print(vip_dict)

user: 1
True
user: 2
False
user: 3
False
user: 4
False
user: 5
True
user: 6
False
user: 7
False
user: 8
False
user: 9
True
user: 10
True
{1: True, 5: True, 9: True, 10: True}


In [26]:
criteria_months = 8
vip_dict, status_dict  = vip_criteria(monthly_purchases_df,criteria_months)
print(vip_dict)

user: 1
False
user: 2
False
user: 3
False
user: 4
False
user: 5
False
user: 6
False
user: 7
False
user: 8
False
user: 9
False
user: 10
False
{}
