# **5-2 - Conjoint Analysis**

Marketing and Customer Analytics
---

In [5]:
import pandas as pd
import numpy as np

import pandas as pd
import numpy as np

**Outline**

1. Business Understanding
2. Define Path & Constant
3. Generate Variant
4. Wrangling
5. Generate Conjoint Data

# **1. Business Understanding**
---

- Company ABC is SaaS platform for online stores, e-commerce, and retail point-of-sale systems
- The ABC platform offers online retailers a suite of services including payments, marketing, shipping and customer engagement tools


## **1.2 Business Objective**
---
- Company ABC wants to know which attribute of the platform is the most prefered for customers
- Company uses the information to determine which marketing messages is the most suitable to be advertized and at what price

## **1.2 Business Question**
---
- Which platform features do customers like the most?
- What marketing messages should be used/focused on?
- What price should the company set to determine the monthly subscription price plan?

## **1.3 Modelling Task**
---

- Output target: customer choice
- The goal of this project is model customer choice (Choice Based Conjoint Method / CBC)
- Modelling task: Classification
- We need interpretable model, so model used: Logistic Regression
- Our target response is imbalance, so we consider use f1-score as evaluation metric

# **2. Define Path & Constant**
---

In [6]:
INPUT_PATH = ""
OUTPUT_PATH = ""

OPTIONS = "ABC"

In [7]:
# Read question combination
filename = "df_pairs.csv"

data = pd.read_csv(INPUT_PATH + filename, index_col=0)
print('Shape data:', data.shape)
data.head(10)

Shape data: (60, 2)


Unnamed: 0,Attribute,Price
0,Shipping and Fulfillment,$40
1,Shipping and Fulfillment,$20
2,Analytics and Reporting,$35
3,SEO and Marketing Features,$45
4,Bulk Product Upload,$45
5,Website Customization,$55
6,Customer Support,$35
7,Automated Email Marketing,$40
8,Customer Support,$40
9,Integration with Third-Party Apps,$25


# **3. Generate Variant**
---

In [8]:
def generateVariant(combi_data, n_question):
    """
    Function to generate variant
    Parameters
    ----------

    combi_data: pandas Dataframe
        Conjoint Attribute dataframe

    n_question: int
        number of question in conjoint survey

    Returns
    -------
    varian: dict
        Question variant
    """
    # Extract combi data
    total_combi, n_attribute = combi_data.shape
    n_combi = int(total_combi/n_question)

    # separate combi_data to chunk per question
    combi_chunks = [combi_data[i:i+n_combi] for i in range(0, total_combi, n_combi)]

    # Extract variant
    options = "ABCD"
    varian = {}

    for no, chunk in enumerate(combi_chunks):
        # reset index chunk
        chunk = chunk.reset_index(drop = True)

        # extract every combination in each question
        question_dict = {}
        for i in range(n_combi):
            question_dict[options[i]] = chunk.loc[i].tolist()

        question_dict["D"] = ["" for i in range(n_attribute)]

        # add question_dict to varian
        varian[no+1] = question_dict

    return varian

In [9]:
VARIAN = generateVariant(combi_data = data,
                         n_question = 20)

VARIAN

{1: {'A': ['Shipping and Fulfillment', '$40'],
  'B': ['Shipping and Fulfillment', '$20'],
  'C': ['Analytics and Reporting', '$35'],
  'D': ['', '']},
 2: {'A': ['SEO and Marketing Features', '$45'],
  'B': ['Bulk Product Upload', '$45'],
  'C': ['Website Customization', '$55'],
  'D': ['', '']},
 3: {'A': ['Customer Support', '$35'],
  'B': ['Automated Email Marketing', '$40'],
  'C': ['Customer Support', '$40'],
  'D': ['', '']},
 4: {'A': ['Integration with Third-Party Apps', '$25'],
  'B': ['Website Customization', '$50'],
  'C': ['Analytics and Reporting', '$30'],
  'D': ['', '']},
 5: {'A': ['Customer Support', '$55'],
  'B': ['Analytics and Reporting', '$25'],
  'C': ['Bulk Product Upload', '$30'],
  'D': ['', '']},
 6: {'A': ['Website Customization', '$45'],
  'B': ['Bulk Product Upload', '$20'],
  'C': ['Automated Email Marketing', '$45'],
  'D': ['', '']},
 7: {'A': ['Customer Support', '$50'],
  'B': ['Customer Support', '$25'],
  'C': ['Bulk Product Upload', '$25'],
  'D':

# **4. Wrangling Questionnaire**
---

In [12]:
def drop_column(data, column_to_drop):
    """
    Function to drop columns

    Parameters
    ----------

    data: pandas dataframe
        sampel data

    column_to_drop: list
        Columns name to drop

    Return
    ------

    final_data: pandas dataframe
        Final sampel data
    """
    # Copy data
    final_data = data.copy()

    # Drop data
    final_data.drop(columns = column_to_drop,
                    inplace = True)

    return final_data

def drop_index(data, index_to_drop):
    """
    Function to drop index

    Parameters
    ----------

    data: pandas dataframe
        sampel data

    index_to_drop: list
        index list to drop

    Returns
    -------

    final_data: pandas dataframe
        final sampel data
    """
    # Copy data
    final_data = data.copy()

    # Drop data
    final_data.drop(index_to_drop,
                    inplace = True)

    return final_data

def read_data(filename, index_to_drop, column_to_drop):
    """
    Function to read the data

    Parameters
    ----------

    filename: str
        path data

    index_to_drop: list
        List index to drop

    column_to_drop: list
        List column to drop

    Returns
    -------

    data: pandas dataframe
        final sampel data
    """
    # read raw data
    data = pd.read_csv(INPUT_PATH + filename)

    # drop kolom
    data = drop_column(data = data,
                       column_to_drop = column_to_drop)

    # drop index
    data = drop_index(data = data,
                      index_to_drop = [])

    # reset index
    data = data.reset_index(drop = True)

    return data

In [13]:
# Input
filename = "Survey.csv"
index_to_drop = []
column_to_drop = ['What is your name?', 'What is your phone number?']

raw_data = read_data(filename = filename,
                     index_to_drop = index_to_drop,
                     column_to_drop = column_to_drop)

print('Data shape:', raw_data.shape)
raw_data.head()

Data shape: (149, 22)


Unnamed: 0,how long has your business been running?,1. Which product will you buy?,2. Which product will you buy?,3. Which product will you buy?,4. Which product will you buy?,5. Which product will you buy?,6. Which product will you buy?,7. Which product will you buy?,8. Which product will you buy?,9. Which product will you buy?,...,12. Which product will you buy?,13. Which product will you buy?,14. Which product will you buy?,15. Which product will you buy?,16. Which product will you buy?,17. Which product will you buy?,18. Which product will you buy?,19. Which product will you buy?,20. Which product will you buy?,What is your email?
0,More than 1 year,B,A,"A, B, C","A, B","A, B, C","A, B",D. None (No choice),D. None (No choice),C,...,"A, B, C",B,"A, B",D. None (No choice),C,"A, B, C","A, C",C,C,debragray@example.com
1,Less than 1 year,C,D. None (No choice),A,D. None (No choice),A,A,D. None (No choice),D. None (No choice),"B, C",...,C,D. None (No choice),D. None (No choice),D. None (No choice),D. None (No choice),A,"B, C",C,D. None (No choice),mariemason@example.com
2,More than 1 year,D. None (No choice),C,A,A,B,A,D. None (No choice),C,C,...,B,C,D. None (No choice),A,D. None (No choice),D. None (No choice),C,C,D. None (No choice),lauraphillips@example.com
3,More than 1 year,"A, B","A, B","A, B","A, B","A, B, C","A, C","A, C","A, C","A, C",...,"A, B, C","A, B","A, B","A, C","A, C","B, C","A, C","A, B, C","A, B, C",lisaholland@example.com
4,More than 1 year,A,A,C,"A, C",C,C,C,"A, C",B,...,"A, B","A, C",A,"A, B, C","A, B, C",C,"B, C","A, B","A, B, C",brandongonzalez@example.com


In [14]:
# check
raw_data['how long has your business been running?'].value_counts()

Unnamed: 0_level_0,count
how long has your business been running?,Unnamed: 1_level_1
More than 1 year,97
Less than 1 year,52


# **5. Generate Conjoint Data**
---

In [15]:
def get_user_answer_raw(user_data):
    """
    Function to get user answer (dirty data)

    Parameters
    ----------

    user_data: pandas Series
        sampel user

    Returns
    -------

    user_answer_list: list
      user list answer
    """
    # Define
    user_answer_list = []
    n = len(user_data.index)

    for i in range(n):
        if i>0 and i<n-1:
            user_answer_list.append(user_data[i])

    return user_answer_list

def edit_answer(answer):
    """
    Function to edit the answer

    Parameters
    ----------

    answer: list
        raw answer

    Returns
    -------

    edited_answer: list
        edited answer
    """
    edited_answer = str(answer).replace(". None (No choice)", "")
    edited_answer = edited_answer.replace(" ", "")
    edited_answer = edited_answer.split(",")

    return edited_answer

def get_ohe_answer(answer):
    """
    Function to encode (OHE) the answer
    e.g.,:
    input: ["A"],       output: [1, 0, 0]
    input: ["A", "C"],  output: [1, 0, 1]
    input: ["D"],       output: [0, 0, 0]

    Parameters
    ----------

    answer: list
      answer list

    Returns
    -------

    ohe_answer: list
        OHE answer
    """
    ohe_answer = [1 if opt in answer else 0 for opt in OPTIONS]

    return ohe_answer

def convert_user_answer(user_answer_list):
    """
    Function to convert user answer from choice to feature

    Parameters
    ----------

    user_answer_list: list
        user answer choice

    Returns
    -------

    converted_user_answer_list: list
        choosen feature by user
    """
    converted_user_answer_list = []

    for idx, answer in enumerate(user_answer_list):
        # Edit answer
        answer = edit_answer(answer = answer)

        # Get ohe answer
        ohe_answer = get_ohe_answer(answer = answer)

        # Convert ohe answer
        for i in range(len(OPTIONS)):
            converted_answer = VARIAN[idx+1][OPTIONS[i]].copy()
            converted_answer.insert(0, ohe_answer[i])
            converted_user_answer_list.append(converted_answer)

    return converted_user_answer_list

def convert_answer_to_df(user_data, user_answer):
    """
    Function to convert user answer to dataframe

    Parameters
    ----------

    user_data: pandas Series
        sampel user

    user_answer: list
        User answer

    Returns
    -------

    user_answer_df: pandas Dataframe
        user answer
    """
    user_answer = np.array(user_answer)

    user_answer_df = pd.DataFrame({"Choice": user_answer[:, 0],
                                   "Attribute": user_answer[:, 1],
                                   "Price": user_answer[:, 2]})

    user_answer_df["Email"] = user_data["What is your email?"]
    user_answer_df["Business Year"] = user_data["how long has your business been running?"]

    user_answer_df = user_answer_df[["Email", "Business Year",
                                     "Choice", "Price","Attribute"]]

    return user_answer_df

def get_user_answer_clean(user_data):
    """
    Function to get clean user answer

    Parameters
    ----------

    user_data: pandas Series
        sampel user

    Returns
    -------

    clean_user_answer: pandas dataframe
        user answer
    """
    # get raw user answer
    raw_user_answer_list = get_user_answer_raw(user_data = user_data)

    # clean raw user answer
    clean_user_answer_list = convert_user_answer(user_answer_list = raw_user_answer_list)

    # convert answer to pandas dataframe
    clean_user_answer = convert_answer_to_df(user_data = user_data,
                                             user_answer = clean_user_answer_list)

    return clean_user_answer

def generate_conjoint_data(raw_data):
    """
    Function to generate conjoint data

    Parameters
    ----------

    raw_data: pandas Dataframe
        sampel data raw

    Returns
    -------

    conjoint_data: pandas Dataframe
        sampel data final
    """
    conjoint_data = pd.DataFrame()

    for user in raw_data.index:
        # Get user data
        user_data = raw_data.loc[user]

        # Get clean user answer
        clean_user_answer = get_user_answer_clean(user_data = user_data)

        # Concat data
        conjoint_data = pd.concat([conjoint_data, clean_user_answer])

    return conjoint_data

In [16]:
conjoint_data = generate_conjoint_data(raw_data = raw_data)
conjoint_data

  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])
  user_answer_list.append(user_data[i])


Unnamed: 0,Email,Business Year,Choice,Price,Attribute
0,debragray@example.com,More than 1 year,0,$40,Shipping and Fulfillment
1,debragray@example.com,More than 1 year,1,$20,Shipping and Fulfillment
2,debragray@example.com,More than 1 year,0,$35,Analytics and Reporting
3,debragray@example.com,More than 1 year,1,$45,SEO and Marketing Features
4,debragray@example.com,More than 1 year,0,$45,Bulk Product Upload
...,...,...,...,...,...
55,angelicagibson@example.com,More than 1 year,0,$30,SEO and Marketing Features
56,angelicagibson@example.com,More than 1 year,1,$55,Shipping and Fulfillment
57,angelicagibson@example.com,More than 1 year,0,$50,Bulk Product Upload
58,angelicagibson@example.com,More than 1 year,1,$50,SEO and Marketing Features


In [17]:
# Save conjoint data
conjoint_data.to_csv(OUTPUT_PATH+"full_respond.csv", index=False)
print("data saved")

data saved


In [18]:
more_than_year_respond = conjoint_data[conjoint_data["Business Year"]=="More than 1 year"]
more_than_year_respond

more_than_year_respond.to_csv(OUTPUT_PATH+"more_than_year_respond.csv", index=False)

In [19]:
less_than_year_respond = conjoint_data[conjoint_data["Business Year"]=="Less than 1 year"]
less_than_year_respond

less_than_year_respond.to_csv(OUTPUT_PATH+"less_than_year_respond.csv", index=False)

In [20]:
print(conjoint_data.shape, less_than_year_respond.shape, more_than_year_respond.shape)

(8940, 5) (3120, 5) (5820, 5)


In [21]:
for col in set(conjoint_data['Price']):
    print(col)
    print(conjoint_data[conjoint_data['Price']==col]['Choice'].value_counts(normalize=True))
    print("")

$20
Choice
0    0.696069
1    0.303931
Name: proportion, dtype: float64

$55
Choice
0    0.682047
1    0.317953
Name: proportion, dtype: float64

$45
Choice
0    0.587248
1    0.412752
Name: proportion, dtype: float64

$50
Choice
0    0.674976
1    0.325024
Name: proportion, dtype: float64

$35
Choice
0    0.651007
1    0.348993
Name: proportion, dtype: float64

$25
Choice
0    0.71057
1    0.28943
Name: proportion, dtype: float64

$30
Choice
0    0.621285
1    0.378715
Name: proportion, dtype: float64

$40
Choice
0    0.643456
1    0.356544
Name: proportion, dtype: float64

