In [1]:
#imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

In [2]:
#read files
import zipfile

customers = pd.read_csv('customers_mod.csv')
    
with zipfile.ZipFile('articles.csv.zip', 'r') as z:
    f = z.open('articles.csv')
    articles = pd.read_csv(f)
    
with zipfile.ZipFile('transactions_train.csv.zip', 'r') as z:
    f = z.open('transactions_train.csv')
    transactions = pd.read_csv(f)

In [3]:
def select_columns(df):
    return (df
            .iloc[:,:-1]
           )

def tweak_data(df):
    return (df
     .assign(age = df.age.astype('Int8'))
           )

In [4]:
def tweak_data_trns(df):
    return (df
     .assign(price = df.price.astype('float32'),
             sales_channel = 'store')
     .assign(sales_channel = lambda df: df.sales_channel.where(df.sales_channel_id == 1, 'online').astype('category')
             )
     .drop(columns=['sales_channel_id'])
           )

In [5]:
def tweak_data_arts(df):
    cols = ['article_id','product_type_name']
    return (df
     [cols]
     .assign(product_type = df.product_type_name.astype('category'),
             )
     )

In [6]:
def merge_dfs(customers, transactions, articles):
    return (
    pd.merge(pd.merge(tweak_data(customers), tweak_data_trns(transactions), on='customer_id'),
            tweak_data_arts(articles), on='article_id')
    )

In [7]:
df = (merge_dfs(customers, transactions, articles)
            .drop(columns=['article_id']))

In [8]:
df.head()

Unnamed: 0.1,Unnamed: 0,customer_id,Customer_segment,age,t_dat,price,sales_channel,product_type_name,product_type
0,0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,High value Customer,49,2018-12-27,0.044051,store,Jacket,Jacket
1,10612,0209bfc5582b5c855952e5b43dc9c17bef921ca0f46b42...,Lost Customers,44,2018-10-03,0.061,store,Jacket,Jacket
2,30006,05d20eff33ec820d4330ee25cce4d0c35e8a305137caf0...,Medium Value Customer,41,2019-12-17,0.033881,online,Jacket,Jacket
3,30816,05fa492d5d182adb9dca7cc20be545d2dc8990606d2197...,Medium Value Customer,41,2018-11-22,0.050831,store,Jacket,Jacket
4,31188,060de711fa176fb5249bb39f0eceea040bde903f8b0434...,Lost Customers,45,2018-09-24,0.06778,online,Jacket,Jacket


In [9]:
df.shape

(31471572, 9)

In [10]:
new_df = df[['customer_id','Customer_segment','age','t_dat','product_type']]
new_df

Unnamed: 0,customer_id,Customer_segment,age,t_dat,product_type
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,High value Customer,49,2018-12-27,Jacket
1,0209bfc5582b5c855952e5b43dc9c17bef921ca0f46b42...,Lost Customers,44,2018-10-03,Jacket
2,05d20eff33ec820d4330ee25cce4d0c35e8a305137caf0...,Medium Value Customer,41,2019-12-17,Jacket
3,05fa492d5d182adb9dca7cc20be545d2dc8990606d2197...,Medium Value Customer,41,2018-11-22,Jacket
4,060de711fa176fb5249bb39f0eceea040bde903f8b0434...,Lost Customers,45,2018-09-24,Jacket
...,...,...,...,...,...
31471567,ffe6e53e0ac98cd3da1697e057f24f002482ef00d0573b...,Top Customers,22,2018-09-26,Bag
31471568,fff22c025716b0f38fadb92a2bd09881c98e11368589c1...,Low Value Customers,41,2018-11-20,Dress
31471569,fff3e75605ec575be9b95eda1e6557299e81bba12668d7...,Top Customers,33,2018-10-17,Trousers
31471570,fff4381593e170ca0aea188998487c830d9a4070c9ec4b...,High value Customer,27,2019-03-18,Vest top


In [11]:
test = new_df[new_df['Customer_segment']=='Top Customers'].groupby(by=['customer_id','t_dat'], as_index=False)['product_type'].apply(lambda x: ','.join(x.astype(str))) 
test = pd.DataFrame(test)
test

Unnamed: 0_level_0,Unnamed: 1_level_0,0
customer_id,t_dat,Unnamed: 2_level_1
00009d946eec3ea54add5ba56d5210ea898def4b46c68570cf0096d962cacc75,2018-10-10,"Trousers,Sweater,Hoodie,Trousers,Hoodie,Blouse"
00009d946eec3ea54add5ba56d5210ea898def4b46c68570cf0096d962cacc75,2018-11-19,"Trousers,T-shirt,Jacket,Jacket,Sweater,Sweater..."
00009d946eec3ea54add5ba56d5210ea898def4b46c68570cf0096d962cacc75,2018-11-22,"Trousers,T-shirt,Sweater,Cardigan,Top,Trousers..."
00009d946eec3ea54add5ba56d5210ea898def4b46c68570cf0096d962cacc75,2018-11-26,"Sweater,Top"
00009d946eec3ea54add5ba56d5210ea898def4b46c68570cf0096d962cacc75,2018-12-09,"Trousers,Trousers"
...,...,...
ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1778d0116cffd259264,2020-01-22,Trousers
ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1778d0116cffd259264,2020-07-03,"Polo shirt,Polo shirt,Polo shirt"
ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1778d0116cffd259264,2020-07-16,"Trousers,T-shirt,Sweater"
ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1778d0116cffd259264,2020-09-08,"Sweater,Shirt,Dress"


In [13]:
test = test.reset_index().drop(columns='t_dat')
test

Unnamed: 0,customer_id,0
0,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"Trousers,Sweater,Hoodie,Trousers,Hoodie,Blouse"
1,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"Trousers,T-shirt,Jacket,Jacket,Sweater,Sweater..."
2,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"Trousers,T-shirt,Sweater,Cardigan,Top,Trousers..."
3,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"Sweater,Top"
4,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"Trousers,Trousers"
...,...,...
3382300,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,Trousers
3382301,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,"Polo shirt,Polo shirt,Polo shirt"
3382302,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,"Trousers,T-shirt,Sweater"
3382303,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,"Sweater,Shirt,Dress"


In [14]:
df_item_sets = customers.drop(columns='Unnamed: 0').merge(test.rename(columns={0:'items'}))
df_item_sets

Unnamed: 0,customer_id,Customer_segment,age,items
0,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,Top Customers,56,"Trousers,Sweater,Hoodie,Trousers,Hoodie,Blouse"
1,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,Top Customers,56,"Trousers,T-shirt,Jacket,Jacket,Sweater,Sweater..."
2,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,Top Customers,56,"Trousers,T-shirt,Sweater,Cardigan,Top,Trousers..."
3,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,Top Customers,56,"Sweater,Top"
4,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,Top Customers,56,"Trousers,Trousers"
...,...,...,...,...
3382300,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,Top Customers,21,Trousers
3382301,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,Top Customers,21,"Polo shirt,Polo shirt,Polo shirt"
3382302,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,Top Customers,21,"Trousers,T-shirt,Sweater"
3382303,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,Top Customers,21,"Sweater,Shirt,Dress"


In [15]:
#filter by age
age20_30=df_item_sets[(df_item_sets.age>=20) & (df_item_sets.age<30)]
age30_40=df_item_sets[(df_item_sets.age>=30) & (df_item_sets.age<40)]
age40_50=df_item_sets[(df_item_sets.age>=40) & (df_item_sets.age<50)]
age40_above=df_item_sets[(df_item_sets.age>=40)]
# Only the baskets
items_20_30 = age20_30['items'].values.tolist()
items_30_40 = age30_40['items'].values.tolist()
items_40_50 = age40_50['items'].values.tolist()
items_40_above = age40_above['items'].values.tolist()

purchaseslist20_30 = [] 
purchaseslist30_40 = [] 
purchaseslist40_50 = []
purchaseslist40above = []

for i in items_20_30:
    #print(i)
    purchaseslist20_30.append(i.split(",")) 


for i in items_30_40:
    #print(i)
    purchaseslist30_40.append(i.split(",")) 

for i in items_40_50:
    #print(i)
    purchaseslist40_50.append(i.split(",")) 

for i in items_40_above:
    #print(i)
    purchaseslist40above.append(i.split(",")) 

In [16]:
#Sparse=True go in fit 
te = TransactionEncoder()
te_ary = te.fit(purchaseslist20_30).transform(purchaseslist20_30)
df20_30 = pd.DataFrame(te_ary,columns=te.columns_)


te_ary = te.fit(purchaseslist30_40).transform(purchaseslist30_40)
df30_40 = pd.DataFrame(te_ary,columns=te.columns_)

te_ary = te.fit(purchaseslist40_50).transform(purchaseslist40_50)
df40_50 = pd.DataFrame(te_ary,columns=te.columns_)


te_ary = te.fit(purchaseslist40above).transform(purchaseslist40above)
df40above = pd.DataFrame(te_ary,columns=te.columns_)

In [17]:
def freqitm(df,minsup):
    frequent_itemsets = fpgrowth(df, min_support=minsup,use_colnames=True)
    frequent_itemsets= frequent_itemsets.sort_values(by=['support'],ascending=False)
    return frequent_itemsets

In [18]:
def apo_rules(frequent_itemsets):
    apriori_rules=association_rules(frequent_itemsets, metric ="lift", min_threshold = 1)
    apriori_rules=apriori_rules.sort_values(['confidence', 'lift'], ascending =[False, False])
    return apriori_rules

# Top Customers

## Age group 20-30

In [19]:
minsup = 0.025

In [20]:
# age 20-30
freq20_30=freqitm(df20_30,minsup)
freq20_30

Unnamed: 0,support,itemsets
1,0.238266,(Trousers)
3,0.204613,(Dress)
8,0.189315,(Sweater)
4,0.132724,(T-shirt)
7,0.126052,(Top)
10,0.111841,(Blouse)
12,0.098853,(Bra)
6,0.092834,(Vest top)
19,0.081389,(Skirt)
11,0.069583,(Underwear bottom)


In [21]:
apo_rules(freq20_30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2,(Swimwear bottom),(Bikini top),0.067477,0.068388,0.05214,0.772714,11.299056,0.047526,4.098861
3,(Bikini top),(Swimwear bottom),0.068388,0.067477,0.05214,0.762426,11.299056,0.047526,3.925196
17,(Underwear bottom),(Bra),0.069583,0.098853,0.03045,0.437611,4.426904,0.023572,1.602356
22,(Skirt),(Dress),0.081389,0.204613,0.026628,0.327173,1.598987,0.009975,1.182157
8,(Blouse),(Trousers),0.111841,0.238266,0.03499,0.31285,1.313031,0.008342,1.108542
16,(Bra),(Underwear bottom),0.098853,0.069583,0.03045,0.308037,4.426904,0.023572,1.344606
5,(Top),(Trousers),0.126052,0.238266,0.038248,0.303432,1.273501,0.008214,1.093553
1,(Sweater),(Trousers),0.189315,0.238266,0.054104,0.285786,1.199441,0.008996,1.066535
7,(T-shirt),(Trousers),0.132724,0.238266,0.037052,0.279166,1.171656,0.005428,1.056739
12,(Blouse),(Dress),0.111841,0.204613,0.030611,0.2737,1.337651,0.007727,1.095123


## Age group 30-40

In [22]:
# age 30-40
freq30_40=freqitm(df30_40,minsup)
freq30_40

Unnamed: 0,support,itemsets
4,0.259345,(Trousers)
5,0.238473,(Dress)
1,0.194331,(Sweater)
17,0.14117,(T-shirt)
9,0.135108,(Top)
18,0.114688,(Blouse)
11,0.104492,(Vest top)
8,0.092431,(Bra)
10,0.085686,(Skirt)
2,0.070675,(Underwear bottom)


In [23]:
apo_rules(freq30_40)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
7,(Bikini top),(Swimwear bottom),0.056208,0.056338,0.042433,0.754934,13.399968,0.039267,3.850639
6,(Swimwear bottom),(Bikini top),0.056338,0.056208,0.042433,0.753185,13.399968,0.039267,3.823885
25,(Underwear bottom),(Bra),0.070675,0.092431,0.029341,0.415156,4.491533,0.022809,1.551813
23,(Skirt),(Dress),0.085686,0.238473,0.029776,0.347503,1.457201,0.009342,1.167096
8,(Blouse),(Trousers),0.114688,0.259345,0.03849,0.335604,1.294047,0.008746,1.11478
3,(Top),(Trousers),0.135108,0.259345,0.044844,0.331913,1.279815,0.009805,1.108622
1,(Sweater),(Trousers),0.194331,0.259345,0.0619,0.318528,1.228202,0.011501,1.086846
24,(Bra),(Underwear bottom),0.092431,0.070675,0.029341,0.31744,4.491533,0.022809,1.361529
5,(T-shirt),(Trousers),0.14117,0.259345,0.044757,0.317045,1.222485,0.008146,1.084486
14,(Blouse),(Dress),0.114688,0.238473,0.035361,0.308324,1.292911,0.008011,1.100989


## Age group 40-50

In [24]:
# age 40-50
freq40_50=freqitm(df40_50,minsup)
freq40_50

Unnamed: 0,support,itemsets
0,0.275018,(Trousers)
5,0.220168,(Dress)
1,0.191182,(Sweater)
10,0.136155,(T-shirt)
2,0.121104,(Top)
6,0.11628,(Blouse)
13,0.084448,(Vest top)
3,0.073937,(Skirt)
14,0.072353,(Shorts)
15,0.070736,(Bra)


In [25]:
apo_rules(freq40_50)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
9,(Bikini top),(Swimwear bottom),0.046049,0.047469,0.034943,0.75883,15.98582,0.032757,3.949631
8,(Swimwear bottom),(Bikini top),0.047469,0.046049,0.034943,0.736127,15.98582,0.032757,3.615195
6,(Blouse),(Trousers),0.11628,0.275018,0.036953,0.317793,1.155536,0.004974,1.062701
5,(Top),(Trousers),0.121104,0.275018,0.03808,0.314442,1.143352,0.004774,1.057507
3,(T-shirt),(Trousers),0.136155,0.275018,0.042634,0.313126,1.138565,0.005189,1.05548
1,(Sweater),(Trousers),0.191182,0.275018,0.059669,0.312104,1.134851,0.00709,1.053913
10,(Blouse),(Dress),0.11628,0.220168,0.030484,0.262158,1.190719,0.004883,1.05691
12,(Top),(Sweater),0.121104,0.191182,0.028986,0.239349,1.251944,0.005833,1.063324
14,(Top),(Dress),0.121104,0.220168,0.028632,0.236429,1.073856,0.001969,1.021296
19,(Top),(T-shirt),0.121104,0.136155,0.027166,0.224321,1.647542,0.010677,1.113663


## Age group above 40

In [26]:
# age above 40
freq40_above=freqitm(df40above,minsup)
freq40_above

Unnamed: 0,support,itemsets
0,0.269368,(Trousers)
15,0.21746,(Dress)
1,0.191198,(Sweater)
2,0.125671,(Blouse)
4,0.120829,(T-shirt)
7,0.120284,(Top)
9,0.072745,(Vest top)
13,0.067293,(Skirt)
11,0.063684,(Shirt)
17,0.060341,(Bra)


In [27]:
apo_rules(freq40_above)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
11,(Bikini top),(Swimwear bottom),0.039486,0.040651,0.029663,0.751235,18.480092,0.028058,3.85645
10,(Swimwear bottom),(Bikini top),0.040651,0.039486,0.029663,0.729696,18.480092,0.028058,3.553465
7,(Top),(Trousers),0.120284,0.269368,0.035432,0.294573,1.093571,0.003032,1.03573
5,(T-shirt),(Trousers),0.120829,0.269368,0.035542,0.294152,1.092009,0.002995,1.035113
2,(Blouse),(Trousers),0.125671,0.269368,0.036751,0.292435,1.085635,0.002899,1.032601
1,(Sweater),(Trousers),0.191198,0.269368,0.054848,0.286866,1.064961,0.003346,1.024537
8,(Blouse),(Dress),0.125671,0.21746,0.029796,0.237092,1.090279,0.002467,1.025733
12,(Top),(Sweater),0.120284,0.191198,0.027018,0.224622,1.174815,0.00402,1.043107
0,(Trousers),(Sweater),0.269368,0.191198,0.054848,0.203618,1.064961,0.003346,1.015596
13,(Sweater),(Top),0.191198,0.120284,0.027018,0.141312,1.174815,0.00402,1.024488


# High Value Customers

In [28]:
test_2 = new_df[new_df['Customer_segment']=='High value Customer'].groupby(by=['customer_id','t_dat'], as_index=False)['product_type'].apply(lambda x: ','.join(x.astype(str))) 
test_2 = pd.DataFrame(test_2)
test_2 = test_2.reset_index().drop(columns='t_dat')
test_2

Unnamed: 0,customer_id,0
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,"Jacket,Hoodie,Jacket"
1,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,Jumpsuit/Playsuit
2,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,"Blazer,Blazer"
3,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,"Shirt,Skirt"
4,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,Trousers
...,...,...
2024520,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,"Trousers,Top,Blouse"
2024521,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,"Swimwear bottom,Bikini top,Swimwear bottom,Bik..."
2024522,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,"Leggings/Tights,T-shirt,T-shirt"
2024523,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,"Hair/alice band,Vest top,T-shirt,Hair/alice band"


In [29]:
df_item_sets_2 = customers.drop(columns='Unnamed: 0').merge(test_2.rename(columns={0:'items'}))
df_item_sets_2

Unnamed: 0,customer_id,Customer_segment,age,items
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,High value Customer,49,"Jacket,Hoodie,Jacket"
1,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,High value Customer,49,Jumpsuit/Playsuit
2,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,High value Customer,49,"Blazer,Blazer"
3,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,High value Customer,49,"Shirt,Skirt"
4,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,High value Customer,49,Trousers
...,...,...,...,...
2024520,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,High value Customer,21,"Trousers,Top,Blouse"
2024521,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,High value Customer,21,"Swimwear bottom,Bikini top,Swimwear bottom,Bik..."
2024522,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,High value Customer,21,"Leggings/Tights,T-shirt,T-shirt"
2024523,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,High value Customer,21,"Hair/alice band,Vest top,T-shirt,Hair/alice band"


In [30]:
#filter by age
age20_30=df_item_sets_2[(df_item_sets_2.age>=20) & (df_item_sets_2.age<30)]
age30_40=df_item_sets_2[(df_item_sets_2.age>=30) & (df_item_sets_2.age<40)]
age40_50=df_item_sets_2[(df_item_sets_2.age>=40) & (df_item_sets_2.age<50)]
age40_above=df_item_sets_2[(df_item_sets_2.age>=40)]
# Only the baskets
items_20_30 = age20_30['items'].values.tolist()
items_30_40 = age30_40['items'].values.tolist()
items_40_50 = age40_50['items'].values.tolist()
items_40_above = age40_above['items'].values.tolist()

purchaseslist20_30 = [] 
purchaseslist30_40 = [] 
purchaseslist40_50 = []
purchaseslist40above = []

for i in items_20_30:
    #print(i)
    purchaseslist20_30.append(i.split(",")) 


for i in items_30_40:
    #print(i)
    purchaseslist30_40.append(i.split(",")) 

for i in items_40_50:
    #print(i)
    purchaseslist40_50.append(i.split(",")) 

for i in items_40_above:
    #print(i)
    purchaseslist40above.append(i.split(",")) 

In [31]:
#Sparse=True go in fit 
te = TransactionEncoder()
te_ary = te.fit(purchaseslist20_30).transform(purchaseslist20_30)
df20_30_2 = pd.DataFrame(te_ary,columns=te.columns_)


te_ary = te.fit(purchaseslist30_40).transform(purchaseslist30_40)
df30_40_2 = pd.DataFrame(te_ary,columns=te.columns_)

te_ary = te.fit(purchaseslist40_50).transform(purchaseslist40_50)
df40_50_2 = pd.DataFrame(te_ary,columns=te.columns_)


te_ary = te.fit(purchaseslist40above).transform(purchaseslist40above)
df40above_2 = pd.DataFrame(te_ary,columns=te.columns_)

## Age group 20-30

In [32]:
# age 20-30
freq20_30=freqitm(df20_30_2,minsup)
freq20_30

Unnamed: 0,support,itemsets
7,0.232167,(Trousers)
3,0.1852,(Sweater)
0,0.179891,(Dress)
11,0.143355,(T-shirt)
13,0.120899,(Top)
4,0.108264,(Bra)
17,0.103641,(Blouse)
12,0.099009,(Vest top)
5,0.079357,(Underwear bottom)
8,0.07744,(Bikini top)


In [33]:
apo_rules(freq20_30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Swimwear bottom),(Bikini top),0.075974,0.07744,0.060548,0.79695,10.291227,0.054664,4.543507
1,(Bikini top),(Swimwear bottom),0.07744,0.075974,0.060548,0.781867,10.291227,0.054664,4.236073
9,(Underwear bottom),(Bra),0.079357,0.108264,0.035225,0.443885,4.100017,0.026634,1.603509
8,(Bra),(Underwear bottom),0.108264,0.079357,0.035225,0.325366,4.100017,0.026634,1.364655
10,(Blouse),(Trousers),0.103641,0.232167,0.032932,0.317753,1.368639,0.00887,1.125447
7,(Top),(Trousers),0.120899,0.232167,0.036528,0.302138,1.301378,0.008459,1.100264
3,(Sweater),(Trousers),0.1852,0.232167,0.051951,0.280516,1.208247,0.008954,1.067198
5,(T-shirt),(Trousers),0.143355,0.232167,0.039505,0.275572,1.186952,0.006222,1.059915
20,(Blouse),(Dress),0.103641,0.179891,0.02675,0.258102,1.434764,0.008106,1.105419
12,(Top),(Sweater),0.120899,0.1852,0.031065,0.256948,1.387408,0.008674,1.096558


## Age group 30-40

In [34]:
# age 30-40
freq30_40=freqitm(df30_40_2,minsup)
freq30_40

Unnamed: 0,support,itemsets
3,0.270484,(Trousers)
4,0.215186,(Dress)
0,0.193536,(Sweater)
14,0.158917,(T-shirt)
7,0.138878,(Top)
8,0.117089,(Vest top)
9,0.111722,(Blouse)
1,0.107806,(Bra)
2,0.087312,(Underwear bottom)
11,0.079031,(Shorts)


In [35]:
apo_rules(freq30_40)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
5,(Bikini top),(Swimwear bottom),0.065435,0.065757,0.051277,0.783641,11.91723,0.046975,4.318026
4,(Swimwear bottom),(Bikini top),0.065757,0.065435,0.051277,0.7798,11.91723,0.046975,4.244175
17,(Underwear bottom),(Bra),0.087312,0.107806,0.037529,0.429819,3.986963,0.028116,1.564756
8,(Blouse),(Trousers),0.111722,0.270484,0.040671,0.364034,1.345865,0.010452,1.1471
33,(Skirt),(Dress),0.078838,0.215186,0.028004,0.355208,1.650701,0.011039,1.217159
7,(Top),(Trousers),0.138878,0.270484,0.04903,0.353045,1.305235,0.011466,1.127615
16,(Bra),(Underwear bottom),0.107806,0.087312,0.037529,0.348112,3.986963,0.028116,1.400067
1,(Sweater),(Trousers),0.193536,0.270484,0.064612,0.333849,1.234266,0.012263,1.095121
3,(T-shirt),(Trousers),0.158917,0.270484,0.052313,0.329187,1.217031,0.009329,1.087511
24,(Blouse),(Dress),0.111722,0.215186,0.034314,0.30714,1.427319,0.010273,1.132715


## Age group 40-50

In [36]:
# age 40-50
freq40_50=freqitm(df40_50_2,minsup)
freq40_50

Unnamed: 0,support,itemsets
5,0.280391,(Trousers)
7,0.193215,(Sweater)
8,0.191627,(Dress)
9,0.154159,(T-shirt)
6,0.120264,(Top)
12,0.110325,(Blouse)
10,0.089865,(Vest top)
15,0.085626,(Shorts)
11,0.082743,(Bra)
16,0.071958,(Underwear bottom)


In [37]:
apo_rules(freq40_50)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
5,(Bikini top),(Swimwear bottom),0.057766,0.059171,0.045616,0.789663,13.345473,0.042198,4.47296
4,(Swimwear bottom),(Bikini top),0.059171,0.057766,0.045616,0.770915,13.345473,0.042198,4.113035
19,(Underwear bottom),(Bra),0.071958,0.082743,0.027906,0.387807,4.686902,0.021952,1.498314
18,(Bra),(Underwear bottom),0.082743,0.071958,0.027906,0.337262,4.686902,0.021952,1.400315
9,(Blouse),(Trousers),0.110325,0.280391,0.03712,0.336461,1.199971,0.006186,1.084501
7,(Top),(Trousers),0.120264,0.280391,0.039218,0.326102,1.163028,0.005497,1.067831
1,(Sweater),(Trousers),0.193215,0.280391,0.06216,0.321714,1.147376,0.007984,1.060922
20,(Shorts),(T-shirt),0.085626,0.154159,0.027364,0.319572,2.073005,0.014164,1.243102
3,(T-shirt),(Trousers),0.154159,0.280391,0.047915,0.310814,1.108504,0.00469,1.044144
25,(Vest top),(T-shirt),0.089865,0.154159,0.025448,0.283184,1.836964,0.011595,1.179998


## Age group above 40

In [38]:
# age above 40
freq40_above=freqitm(df40above_2,minsup)
freq40_above

Unnamed: 0,support,itemsets
5,0.275068,(Trousers)
7,0.195657,(Sweater)
8,0.18923,(Dress)
9,0.138248,(T-shirt)
6,0.120647,(Top)
13,0.119421,(Blouse)
10,0.079175,(Vest top)
12,0.070889,(Bra)
14,0.069117,(Shorts)
3,0.067254,(Shirt)


In [39]:
apo_rules(freq40_above)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
5,(Bikini top),(Swimwear bottom),0.049407,0.050315,0.038441,0.778044,15.463441,0.035955,4.278714
4,(Swimwear bottom),(Bikini top),0.050315,0.049407,0.038441,0.764002,15.463441,0.035955,4.027968
6,(Blouse),(Trousers),0.119421,0.275068,0.037042,0.310182,1.127658,0.004193,1.050904
9,(Top),(Trousers),0.120647,0.275068,0.03694,0.306184,1.113122,0.003754,1.044848
3,(T-shirt),(Trousers),0.138248,0.275068,0.040936,0.296108,1.07649,0.002909,1.029891
1,(Sweater),(Trousers),0.195657,0.275068,0.057758,0.295198,1.073184,0.003939,1.028562
10,(Top),(Sweater),0.120647,0.195657,0.028169,0.233484,1.193332,0.004564,1.049349
14,(Blouse),(Dress),0.119421,0.18923,0.027323,0.228797,1.209092,0.004725,1.051305
17,(Top),(T-shirt),0.120647,0.138248,0.02668,0.221137,1.599565,0.01,1.106423
0,(Trousers),(Sweater),0.275068,0.195657,0.057758,0.209976,1.073184,0.003939,1.018125
