In [1]:
action_collection_to_product_category = {
    'Mid-Season Jewelry and Timepieces': 'Jewelry',
    'Sailor Collection': 'Women Accessory',
    'Spring Collection': 'Women Bags',
    'Formal Retail Action': 'Women Ready-to-Wear',
    'Spring-Summer Collection': 'Women Small Leather Goods',
    'Fall Collection': 'Men Ready-to-Wear',
    'Art Collection': 'Men Accessory',
    'Winter Collection': 'Men Shoes',
    'Retail Action': 'Men Bags',
    'Timepieces Collection': 'Men Small Leather Goods',
    'Summer Collection': 'Woman Shoes',
    'Fall-Winter Collection': 'Watches',
    "Cote d'Azur Collection": 'Child',
    'Small Jewelry and Timepieces': 'Women Ready-to-Wear',
    'ABCDER Collection': 'Woman Shoes',
    'Lady Collection': 'Women Ready-to-Wear',
    'Glamorous Collection': 'Women Ready-to-Wear'
}

In [2]:
import pandas as pd
import numpy as np

In [3]:
transactions_excel = pd.read_excel('../OneDrive_1_2024-2-19/transactions.xlsx')

In [4]:
most_popular_product_category = transactions_excel['product_category'].value_counts().idxmax()
most_popular_product_category

'Women Accessory'

In [5]:
transactions_excel.loc[transactions_excel['product_category'] == 'Unknown', 'product_category'] = most_popular_product_category

In [6]:
client_product_interests = transactions_excel[['client_id', 'product_category', 'gross_amount_euro']].copy()

In [7]:
client_product_interests = client_product_interests.groupby(['client_id', 'product_category']).agg({'gross_amount_euro': 'sum'}).copy()

In [8]:
client_product_interests = client_product_interests.reset_index()
client_product_interests['total_gross_amount_euro'] = client_product_interests.groupby('client_id')['gross_amount_euro'].transform('sum')

In [9]:
client_product_interests

Unnamed: 0,client_id,product_category,gross_amount_euro,total_gross_amount_euro
0,c00029531,Women Accessory,2029,6681
1,c00029531,Women Small Leather Goods,4652,6681
2,c00055636,Men Bags,2278,2898
3,c00055636,Women Accessory,620,2898
4,c00068475,Men Ready-to-Wear,3515,3515
...,...,...,...,...
22973,c99976540,Women Bags,13195,14803
22974,c99978675,Women Ready-to-Wear,6368,6368
22975,c99989096,Women Accessory,984,984
22976,c99995560,Women Bags,3926,3926


In [10]:
client_product_interests['monetary_ratio'] = client_product_interests['gross_amount_euro'] / client_product_interests['total_gross_amount_euro']

In [11]:
client_product_interests[['client_id', 'product_category', 'monetary_ratio']]

Unnamed: 0,client_id,product_category,monetary_ratio
0,c00029531,Women Accessory,0.303697
1,c00029531,Women Small Leather Goods,0.696303
2,c00055636,Men Bags,0.786059
3,c00055636,Women Accessory,0.213941
4,c00068475,Men Ready-to-Wear,1.000000
...,...,...,...
22973,c99976540,Women Bags,0.891373
22974,c99978675,Women Ready-to-Wear,1.000000
22975,c99989096,Women Accessory,1.000000
22976,c99995560,Women Bags,1.000000


In [12]:
product_sub_category_spend = transactions_excel.groupby('product_category').agg({'gross_amount_euro': 'sum'}).copy()
total_spent = product_sub_category_spend['gross_amount_euro'].sum()                                                  

In [13]:
product_sub_category_spend['category_spend_ratio'] = product_sub_category_spend['gross_amount_euro'] / total_spent

In [14]:
product_sub_category_spend = product_sub_category_spend.reset_index()

In [15]:
product_sub_category_spend = product_sub_category_spend[['product_category', 'category_spend_ratio']]

In [16]:
product_sub_category_spend

Unnamed: 0,product_category,category_spend_ratio
0,Child,0.010545
1,HC,0.000235
2,House,0.007319
3,Jewelry,0.058502
4,Men Accessory,0.003716
5,Men Bags,0.01087
6,Men Ready-to-Wear,0.060248
7,Men Shoes,0.021521
8,Men Small Leather Goods,0.003648
9,Watches,0.003158


In [17]:
client_product_interests = client_product_interests[['client_id', 'product_category', 'monetary_ratio']]
client_product_interests

Unnamed: 0,client_id,product_category,monetary_ratio
0,c00029531,Women Accessory,0.303697
1,c00029531,Women Small Leather Goods,0.696303
2,c00055636,Men Bags,0.786059
3,c00055636,Women Accessory,0.213941
4,c00068475,Men Ready-to-Wear,1.000000
...,...,...,...
22973,c99976540,Women Bags,0.891373
22974,c99978675,Women Ready-to-Wear,1.000000
22975,c99989096,Women Accessory,1.000000
22976,c99995560,Women Bags,1.000000


In [18]:
client_product_interests = pd.merge(client_product_interests, product_sub_category_spend, on='product_category', how='left')

In [19]:
client_product_interests

Unnamed: 0,client_id,product_category,monetary_ratio,category_spend_ratio
0,c00029531,Women Accessory,0.303697,0.105593
1,c00029531,Women Small Leather Goods,0.696303,0.044956
2,c00055636,Men Bags,0.786059,0.010870
3,c00055636,Women Accessory,0.213941,0.105593
4,c00068475,Men Ready-to-Wear,1.000000,0.060248
...,...,...,...,...
22973,c99976540,Women Bags,0.891373,0.293031
22974,c99978675,Women Ready-to-Wear,1.000000,0.299146
22975,c99989096,Women Accessory,1.000000,0.105593
22976,c99995560,Women Bags,1.000000,0.293031


In [20]:
client_product_interests['sfisf'] = client_product_interests['monetary_ratio'] * np.log(1 / client_product_interests['category_spend_ratio'])

In [21]:
client_product_interests['sfisf'] = (client_product_interests['sfisf'] - client_product_interests['sfisf'].mean()) / client_product_interests['sfisf'].std()
client_product_interests['sfisf'] = (client_product_interests['sfisf'] - client_product_interests['sfisf'].min()) / (client_product_interests['sfisf'].max() - client_product_interests['sfisf'].min())

In [22]:
client_product_interests

Unnamed: 0,client_id,product_category,monetary_ratio,category_spend_ratio,sfisf
0,c00029531,Women Accessory,0.303697,0.105593,0.081696
1,c00029531,Women Small Leather Goods,0.696303,0.044956,0.258453
2,c00055636,Men Bags,0.786059,0.010870,0.425303
3,c00055636,Women Accessory,0.213941,0.105593,0.057551
4,c00068475,Men Ready-to-Wear,1.000000,0.060248,0.336145
...,...,...,...,...,...
22973,c99976540,Women Bags,0.891373,0.293031,0.130920
22974,c99978675,Women Ready-to-Wear,1.000000,0.299146,0.144403
22975,c99989096,Women Accessory,1.000000,0.105593,0.269005
22976,c99995560,Women Bags,1.000000,0.293031,0.146874


In [23]:
client_product_interests[['client_id', 'product_category', 'sfisf']].to_csv('../features/client_interests.csv', index=False)