In [1]:
# import library
from cryptography.fernet import Fernet
import pandas as pd
import numpy as np

In [2]:
# open the file key
with open('filekey.key', 'rb') as f:
  key = f.read()

In [3]:
# store it in fernet object variable
fernet = Fernet(key)

In [4]:
# open the dataset file
with open('retail_omnichannel_dataset.csv', 'rb') as f:
  retail_dataset = f.read()

In [5]:
#  Decrypt the data
decrypt_retail_data = fernet.decrypt(retail_dataset)

In [6]:
# Overwrite the file
with open('retail_omnichannel_dataset.csv', 'wb') as f:
  f.write(decrypt_retail_data)

In [7]:
# Load the data through pandas
retail = pd.read_csv('retail_omnichannel_dataset.csv')
retail.head()

Unnamed: 0,journey_id,customer_id,path,touch_dates,touch_count,converted,revenue
0,1,C1001,Social Media > Physical Store > Social Media Ads,2023-04-23 | 2023-04-24 | 2023-04-29,3,0,0
1,2,C1002,Physical Store > Social Media > Social Media >...,2024-02-13 | 2024-02-16 | 2024-02-19 | 2024-02...,5,1,9398
2,3,C1003,Website > Social Media Ads > Website > Affilia...,2024-07-07 | 2024-07-09 | 2024-07-13 | 2024-07...,5,0,0
3,4,C1004,Social Media > Social Media > Website > Email ...,2023-03-08 | 2023-03-10 | 2023-03-12 | 2023-03...,5,1,7096
4,5,C1005,Website > Physical Store > Affiliates > Email,2024-01-19 | 2024-01-20 | 2024-01-21 | 2024-01-22,4,1,1838


In [8]:
# Missing values
retail.isna().sum().rename('Missing Values')

Unnamed: 0,Missing Values
journey_id,0
customer_id,0
path,0
touch_dates,0
touch_count,0
converted,0
revenue,0


In [9]:
# Create a copy of the retail data
retail_copy = retail.copy(deep = True)
retail_copy.head()

Unnamed: 0,journey_id,customer_id,path,touch_dates,touch_count,converted,revenue
0,1,C1001,Social Media > Physical Store > Social Media Ads,2023-04-23 | 2023-04-24 | 2023-04-29,3,0,0
1,2,C1002,Physical Store > Social Media > Social Media >...,2024-02-13 | 2024-02-16 | 2024-02-19 | 2024-02...,5,1,9398
2,3,C1003,Website > Social Media Ads > Website > Affilia...,2024-07-07 | 2024-07-09 | 2024-07-13 | 2024-07...,5,0,0
3,4,C1004,Social Media > Social Media > Website > Email ...,2023-03-08 | 2023-03-10 | 2023-03-12 | 2023-03...,5,1,7096
4,5,C1005,Website > Physical Store > Affiliates > Email,2024-01-19 | 2024-01-20 | 2024-01-21 | 2024-01-22,4,1,1838


In [10]:
# creating the column as first touch
retail_copy['first_touch'] = retail_copy['path'].str.split('>').str[0]
retail_copy['first_touch'].head()

Unnamed: 0,first_touch
0,Social Media
1,Physical Store
2,Website
3,Social Media
4,Website


In [12]:
# Group these first touch with revenue to find out the successful ffirst touch
first_touch_group = retail_copy.groupby('first_touch')['revenue'].sum().sort_values(ascending = False).rename('First Touch Revenues')
round(first_touch_group,2)

Unnamed: 0_level_0,First Touch Revenues
first_touch,Unnamed: 1_level_1
Social Media Ads,342613
Social Media,304188
Physical Store,285730
Affiliates,275749
Website,208410
Email,191722


In [13]:
# create a new column as last_touch
retail_copy['last_touch'] = retail_copy['path'].str.split('>').str[-1]
retail_copy['last_touch'].head()

Unnamed: 0,last_touch
0,Social Media Ads
1,Email
2,Affiliates
3,Affiliates
4,Email


In [14]:
# group the revenue
last_touch_group = retail_copy.groupby('last_touch')['revenue'].sum().sort_values(ascending = False).rename('last Touch Revenue')
round(last_touch_group,2)

Unnamed: 0_level_0,last Touch Revenue
last_touch,Unnamed: 1_level_1
Affiliates,325231
Physical Store,281943
Email,280547
Website,266746
Social Media,236963
Social Media Ads,216982


In [15]:
# Linear attribbution -> split it wide
path_wide = retail_copy['path'].str.split('>', expand = True)
path_wide.head()

Unnamed: 0,0,1,2,3,4
0,Social Media,Physical Store,Social Media Ads,,
1,Physical Store,Social Media,Social Media,Physical Store,Email
2,Website,Social Media Ads,Website,Affiliates,Affiliates
3,Social Media,Social Media,Website,Email,Affiliates
4,Website,Physical Store,Affiliates,Email,


In [16]:
# count of customers at each touch points
touch_point_count = path_wide.notna().sum(axis = 1).rename('touch_point_count')
touch_point_count.head()

Unnamed: 0,touch_point_count
0,3
1,5
2,5
3,5
4,4


In [17]:
# convert wide into long format
long = path_wide.reset_index().melt(id_vars = 'index', var_name = 'position', value_name = 'channel').dropna(subset=['channel'])
long.head()

Unnamed: 0,index,position,channel
0,0,0,Social Media
1,1,0,Physical Store
2,2,0,Website
3,3,0,Social Media
4,4,0,Website


In [18]:
# add revenue and counts in the long table
concatenate = pd.concat([retail_copy['revenue'], touch_point_count], axis = 1)

In [19]:
# merge inside the long table
long = long.merge(concatenate, left_on= 'index', right_index= True)
long.head()

Unnamed: 0,index,position,channel,revenue,touch_point_count
0,0,0,Social Media,0,3
1,1,0,Physical Store,9398,5
2,2,0,Website,0,5
3,3,0,Social Media,7096,5
4,4,0,Website,1838,4


In [20]:
# Convert the position from 0 to 1
long['positions'] = long['position'].astype(int) + 1
long.head()

Unnamed: 0,index,position,channel,revenue,touch_point_count,positions
0,0,0,Social Media,0,3,1
1,1,0,Physical Store,9398,5,1
2,2,0,Website,0,5,1
3,3,0,Social Media,7096,5,1
4,4,0,Website,1838,4,1


In [21]:
long.dtypes

Unnamed: 0,0
index,int64
position,object
channel,object
revenue,int64
touch_point_count,int64
positions,int64


In [23]:
# linear share
long['linear_share'] = long['revenue']/long['touch_point_count']

In [25]:
# Clean up channel names
long['channel'] = (
    long['channel']
    .str.strip()       # remove leading/trailing spaces
    .str.lower()       # make all lowercase (optional)
    .str.title()       # capitalize nicely (optional)
)

In [26]:
# Linear Attribution
linear_attribution = long.groupby('channel')['linear_share'].sum().sort_values(ascending = False).rename('Linear Attribution Revenues')
round(linear_attribution,2)

Unnamed: 0_level_0,Linear Attribution Revenues
channel,Unnamed: 1_level_1
Social Media,293721.35
Physical Store,291351.37
Affiliates,289056.35
Email,248163.15
Social Media Ads,243335.27
Website,242784.52


In [27]:
# Time Decay Attribution we calculate the denominator
denom = touch_point_count * (touch_point_count +1)/2

In [28]:
# get the denominator inside the long table
long = long.merge(denom, left_on ='index', right_index = True)
long.head()

Unnamed: 0,index,position,channel,revenue,touch_point_count_x,positions,linear_share,touch_point_count_y
0,0,0,Social Media,0,3,1,0.0,6.0
1,1,0,Physical Store,9398,5,1,1879.6,15.0
2,2,0,Website,0,5,1,0.0,15.0
3,3,0,Social Media,7096,5,1,1419.2,15.0
4,4,0,Website,1838,4,1,459.5,10.0


In [36]:
long.rename(columns = {'touch_point_count_y' : 'denominator'}, inplace = True)

In [37]:
long.head()

Unnamed: 0,index,position,channel,revenue,touch_point_count_x,positions,linear_share,denominator
0,0,0,Social Media,0,3,1,0.0,6.0
1,1,0,Physical Store,9398,5,1,1879.6,15.0
2,2,0,Website,0,5,1,0.0,15.0
3,3,0,Social Media,7096,5,1,1419.2,15.0
4,4,0,Website,1838,4,1,459.5,10.0


In [38]:
# weights of the channels
long['weights'] = long['positions'] / long['denominator']
long['share'] = long['revenue'] * long['weights']

In [40]:
# Time Decay Attribution
time_decay = long.groupby('channel')['share'].sum().sort_values(ascending = False).rename('Time Decay Attribution Revenue')
round(time_decay,2)

Unnamed: 0_level_0,Time Decay Attribution Revenue
channel,Unnamed: 1_level_1
Affiliates,297361.4
Physical Store,290921.07
Social Media,280769.43
Email,262814.57
Website,252472.33
Social Media Ads,224073.2
