<a href="https://colab.research.google.com/github/viznuv/conjoint_analysis/blob/main/conjoint_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import itertools

# Step 1: Read the CSV file (ensure the file name/path is correct)
df = pd.read_csv('/content/bbq_summer.csv')

# Optional: Display the first few rows to verify data
print(df.head())

# Step 2: Convert meal component columns to categorical (if they aren't already)
categorical_cols = ['starter', 'maindishI', 'maindishII', 'side', 'dessert']
for col in categorical_cols:
    df[col] = df[col].astype('category')

# Step 3: Fit an OLS regression model using these categorical variables.
# The formula uses "C(variable)" to tell statsmodels to treat the variable as categorical.
formula = 'avg_rating ~ C(starter) + C(maindishI) + C(maindishII) + C(side) + C(dessert)'
model = smf.ols(formula, data=df).fit()

# Print the model summary to review the coefficients (part-worth utilities)
print(model.summary())

# Step 4: Generate all possible combinations of meal components from the levels in your data
starters   = df['starter'].cat.categories
maindishI  = df['maindishI'].cat.categories
maindishII = df['maindishII'].cat.categories
side       = df['side'].cat.categories
dessert    = df['dessert'].cat.categories

all_combinations = list(itertools.product(starters, maindishI, maindishII, side, dessert))
df_combinations = pd.DataFrame(all_combinations,
                               columns=['starter', 'maindishI', 'maindishII', 'side', 'dessert'])

# Step 5: Use the fitted model to predict the avg_rating for each combination
df_combinations['predicted_rating'] = model.predict(df_combinations)

# Step 6: Identify the best (highest predicted rating) and worst (lowest predicted rating) combinations
best_combo  = df_combinations.loc[df_combinations['predicted_rating'].idxmax()]
worst_combo = df_combinations.loc[df_combinations['predicted_rating'].idxmin()]

print("\nBest Combination:")
print(best_combo)

print("\nWorst Combination:")
print(worst_combo)


   bundleID                starter    maindishI maindishII  \
0         1  Fried Chicken Tenders  BBQ Brisket    Sausage   
1         2  Fried Chicken Tenders  BBQ Brisket    Sausage   
2         3  Fried Chicken Tenders  BBQ Brisket    Sausage   
3         4  Fried Chicken Tenders  BBQ Brisket    Sausage   
4         5  Fried Chicken Tenders  BBQ Brisket    Sausage   

                 side              dessert  avg_rating  
0      Mac and Cheese      Blondie Brownie        6.04  
1      Mac and Cheese  Apple Pie a la Mode        9.29  
2        Potato Salad      Blondie Brownie        6.20  
3        Potato Salad  Apple Pie a la Mode        8.84  
4  French Fry Platter      Blondie Brownie        8.24  
                            OLS Regression Results                            
Dep. Variable:             avg_rating   R-squared:                       0.269
Model:                            OLS   Adj. R-squared:                  0.246
Method:                 Least Squares   F-statis