# Markov Chain Attribution Model

## Model 1

Python Implementation of Markov Chain Attribution Model by Akanksha Anand (Ak)
https://medium.com/@akanksha.etc302/python-implementation-of-markov-chain-attribution-model-0924687e4037

### Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

### Data Procesing

In [None]:
df = pd.read_csv(r'attribution data.csv')
df = df.sort_values(['cookie', 'time'],
                    ascending=[False, True])
df['visit_order'] = df.groupby('cookie').cumcount() + 1

In [None]:
df_last_interaction = df.drop_duplicates('cookie', keep='last')[['cookie', 'conversion']]
df_paths = pd.merge(df, df_last_interaction, how='left', on='cookie')#.drop(columns=['conversion_x']).rename(columns={'conversion_y': 'conversion'})

In [None]:
df_paths['path'] = np.where(
    df_paths['conversion_y'] == 0,
    ['Start, '] + df_paths['channel'] + [', No_Conv'],
    ['Start, '] + df_paths['channel'] + [', Conversion'])

df_paths['path']=df_paths['path'].str.split(', ')
df_paths = df_paths[['cookie', 'path']]
df_paths

In [None]:
df_paths['path'] = np.where(
    df_paths['conversion'] == 0,
    ['Start, '] + df_paths['channel'] + [', No_Conv'],
    ['Start, '] + df_paths['channel'] + [', Conversion'])

df_paths['path']=df_paths['path'].str.split(', ')
df_paths = df_paths[['cookie', 'path']]
df_paths

### Markov Chain

In [None]:
# Conversion Rate
list_of_paths = df_paths['path']
total_conversions = sum(path.count('Conversion') for path in df_paths['path'].tolist())
base_conversion_rate = total_conversions / len(list_of_paths)

In [None]:
# Transition State
list_of_unique_channels = set(x for element in list_of_paths for x in element)
transition_states = {x + '>' + y: 0 for x in list_of_unique_channels for y in list_of_unique_channels}

In [None]:
for possible_state in list_of_unique_channels:
    if possible_state not in ['Conversion', 'No_Conv']:
        for user_path in list_of_paths:
            if possible_state in user_path:
                indices = [i for i, s in enumerate(user_path) if possible_state in s]
                for col in indices:
                    transition_states[user_path[col] + '>' + user_path[col + 1]] += 1
transition_states

In [None]:
transition_prob = defaultdict(dict)
for state in list_of_unique_channels:
    if state not in ['Conversion', 'No_Conv']:
        counter = 0
        index = [i for i, s in enumerate(transition_states) if state + '>' in s]

In [None]:
# Transition Probabilities
transition_prob = defaultdict(dict)
for state in list_of_unique_channels:
    if state not in ['Conversion', 'No_Conv']:
        counter = 0
        index = [i for i, s in enumerate(transition_states) if state + '>' in s]
        for col in index:
            if transition_states[list(transition_states)[col]] > 0:
                counter += trans_dict[list(transition_states)[col]]
        for col in index:
            if transition_states[list(transition_states)[col]] > 0:
                state_prob = float((transition_states[list(transition_states)[col]])) / float(counter)
                transition_prob[list(transition_states)[col]] = state_prob

In [None]:
# Transition Matrix
transition_matrix = pd.DataFrame()
list_of_unique_channels = set(x for element in list_of_paths for x in element)

#Assign zero to all matrix elements
for channel in list_of_unique_channels:
    transition_matrix[channel] = 0.00
    transition_matrix.loc[channel] = 0.00
    transition_matrix.loc[channel][channel] = 1.0 if channel in ['Conversion', 'No_Conv'] else 0.0

#Assign probability using calculated transition probability
for key, value in transition_prob.items():
        origin, destination = key.split('>')
        transition_matrix.at[origin, destination] = value

In [None]:
# Removal Effect
removal_effects_dict = {}
channels = [channel for channel in transition_matrix.columns if channel not in ['Start', 'No_Conv', 'Conversion']]
for channel in channels:
    removal_df = transition_matrix.drop(channel, axis=1).drop(channel, axis=0)
    for column in removal_df.columns:
        row_sum = np.sum(list(removal_df.loc[column]))
        null_pct = float(1) - row_sum
        if null_pct != 0:
            removal_df.loc[column]['No_Conv'] = null_pct
        removal_df.loc['No_Conv']['No_Conv'] = 1.0
    removal_to_conv = removal_df[
        ['No_Conv', 'Conversion']].drop(['No_Conv', 'Conversion'], axis=0)
    removal_to_non_conv = removal_df.drop(
        ['No_Conv', 'Conversion'], axis=1).drop(['No_Conv', 'Conversion'], axis=0)
    removal_inv_diff = np.linalg.inv(
        np.identity(
            len(removal_to_non_conv.columns)) - np.asarray(removal_to_non_conv))
    removal_dot_prod = np.dot(removal_inv_diff, np.asarray(removal_to_conv))
    removal_cvr = pd.DataFrame(removal_dot_prod,
                               index=removal_to_conv.index)[[1]].loc['Start'].values[0]
    removal_effect = 1 - removal_cvr / conversion_rate
    removal_effects_dict[channel] = removal_effect

In [None]:
# Attribution
removal_effect_sum = np.sum(list(removal_effects_dict.values()))
attribution={key: (value / removal_effect_sum) * total_conversions for key, value in removal_effects_dict.items()}

## Model 2

### Markov Chain

In [None]:
class MarkovChainAttributionModel:
    def __init__(self, transition_matrix):
        self.transition_matrix = transition_matrix

    def calculate_attribution(self, path):
        attribution = np.zeros(len(self.transition_matrix))
        for i in range(len(path) - 1):
            attribution[path[i]] += self.transition_matrix[path[i], path[i + 1]]
        return attribution

def main():
    transition_matrix = np.array([[0.7, 0.2, 0.1],
                                 [0.3, 0.5, 0.2],
                                 [0.1, 0.2, 0.7]])

    model = MarkovChainAttributionModel(transition_matrix)

    path = [0, 1, 2]

    attribution = model.calculate_attribution(path)

    print(attribution)

if __name__ == "__main__":
    main()

## Model 3

### Markov Chain

In [None]:
class MarkovChainAttributionModel:
    def __init__(self, transition_matrix, conversion_vector, removal_effects=None):
        """
        Initialize the Markov Chain Attribution Model.
        
        Args:
        - transition_matrix (numpy.ndarray): Transition matrix representing probabilities of transitioning
                                             from one channel to another.
        - conversion_vector (numpy.ndarray): Vector representing the probability of conversion for each channel.
        - removal_effects (numpy.ndarray): Vector representing the removal effects for each channel.
        """
        self.transition_matrix = transition_matrix
        self.conversion_vector = conversion_vector
        self.num_channels = len(transition_matrix)
        self.removal_effects = removal_effects if removal_effects is not None else np.zeros(self.num_channels)
    
    def simulate_conversion_path(self, path_length):
        """
        Simulate a conversion path based on the transition matrix.
        
        Args:
        - path_length (int): Length of the conversion path.
        
        Returns:
        - list: List representing the simulated conversion path.
        """
        current_state = 0  # Start from the initial state
        conversion_path = [current_state]
        for _ in range(path_length - 1):
            next_state = np.random.choice(self.num_channels, p=self.transition_matrix[current_state])
            conversion_path.append(next_state)
            current_state = next_state
        return conversion_path
    
    def calculate_attribution(self, path):
        """
        Calculate attribution for each channel in the given conversion path.
        
        Args:
        - path (list): List representing the conversion path.
        
        Returns:
        - numpy.ndarray: Array representing the attribution for each channel.
        """
        attribution = np.zeros(self.num_channels)
        for i, channel in enumerate(path):
            if i == 0:  # First touch attribution
                attribution[channel] += 1
            else:
                prev_channel = path[i - 1]
                attribution[prev_channel] -= self.removal_effects[prev_channel]
                attribution[channel] += 1
        return attribution / len(path)  # Normalize attribution

In [None]:
# Example usage:
transition_matrix = np.array([[0.7, 0.2, 0.1],
                              [0.1, 0.6, 0.3],
                              [0.3, 0.3, 0.4]])

conversion_vector = np.array([0.1, 0.05, 0.07])

removal_effects = np.array([0.02, 0.01, 0.03])

model = MarkovChainAttributionModel(transition_matrix, conversion_vector, removal_effects)

# Simulate a conversion path
conversion_path = model.simulate_conversion_path(5)
print("Simulated Conversion Path:", conversion_path)

# Calculate attribution
attribution = model.calculate_attribution(conversion_path)
print("Attribution:", attribution)