In [28]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency, chi2

# **CHI-SQUARE TEST**

**Association between Device Type and Customer Satisfaction**

**Background:**

Mizzare Corporation has collected data on customer satisfaction levels for two types of smart home devices: Smart Thermostats and Smart Lights. They want to determine if there's a significant association between the type of device purchased and the customer's satisfaction level.

**Data Provided:**

The data is summarized in a contingency table showing the counts of customers in each satisfaction level for both types of devices:


**Create the data**

In [9]:
data = {
    "Satisfaction":["Very Satisfied","Satisfied","Neutral","Unsatisfied","Very Unsatisfied"],
    "Smart Thermostat":[50,80,60,30,20],
    "Smart Light":[70,100,90,50,50]
}

**Create a DataFrame**

In [11]:
df = pd.DataFrame(data)

In [12]:
df

Unnamed: 0,Satisfaction,Smart Thermostat,Smart Light
0,Very Satisfied,50,70
1,Satisfied,80,100
2,Neutral,60,90
3,Unsatisfied,30,50
4,Very Unsatisfied,20,50


# **1. State the Hypotheses**

Null Hypothesis (
𝐻
0
H
0
​
 ): There is no association between the type of smart home device purchased and customer satisfaction level (independence).

Alternative Hypothesis (
𝐻
𝑎
H
a
​
 ): There is an association between the type of smart home device purchased and customer satisfaction level (dependence).

**Create the contingency table**

# **2. Compute the Chi-Square Statistic**

In [16]:
contingency_table = df.set_index("Satisfaction").T

In [17]:
print("Contingency Table")
print(contingency_table)

Contingency Table
Satisfaction      Very Satisfied  Satisfied  Neutral  Unsatisfied  \
Smart Thermostat              50         80       60           30   
Smart Light                   70        100       90           50   

Satisfaction      Very Unsatisfied  
Smart Thermostat                20  
Smart Light                     50  


**Perform the Chi-Square test**

In [40]:
chi2_stat, p, dof, expected = chi2_contingency(contingency_table)

In [41]:
chi2_stat, p, dof, expected

(5.638227513227513,
 0.22784371130697179,
 4,
 array([[ 48.,  72.,  60.,  32.,  28.],
        [ 72., 108.,  90.,  48.,  42.]]))

**Check the results**

In [42]:
print("\nChi-Square Test Results:")
print(f"Chi-Square Statistic: {chi2_stat}")
print(f"P-value: {p}")
print(f"Degrees of Freedom: {dof}")
print("\nExpected Frequencies:")
print(expected)


Chi-Square Test Results:
Chi-Square Statistic: 5.638227513227513
P-value: 0.22784371130697179
Degrees of Freedom: 4

Expected Frequencies:
[[ 48.  72.  60.  32.  28.]
 [ 72. 108.  90.  48.  42.]]


# **3. Determine the Critical Value**

**Significance level**

In [30]:
alpha = 0.05

**Degrees of freedom**

In [31]:
dof = (contingency_table.shape[0] - 1) * (contingency_table.shape[1] - 1)

In [32]:
dof

4

**Critical value**

In [35]:
critical_value = chi2.ppf(1 - alpha, dof)

In [37]:
print(f"Critical Value at alpha = {alpha} with dof = {dof}: {critical_value}")

Critical Value at alpha = 0.05 with dof = 4: 9.487729036781154


# **4. Make a Decision**

**Decision**

In [43]:
if chi2_stat > critical_value:
    print("\nConclusion: Reject the null hypothesis. There is a significant association between the type of device and customer satisfaction level.")
else:
    print("\nConclusion: Do not reject the null hypothesis. There is no significant association between the type of device and customer satisfaction level.")


Conclusion: Do not reject the null hypothesis. There is no significant association between the type of device and customer satisfaction level.


# **Conclusion** : There is no significant association between the type of device and customer satisfaction level.