In [13]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency

### Hypothesis 1 

#### H(0) : there is no significant relationship between the individuals or household members being diagnosed with COVID-19 and the changes in personal cleanning behavior, online spending habits or stockpiling behavior.

#### H(1) : there is a significant relationship between the individuals or household members being diagnosed with COVID-19 and the changes in personal cleanning behavior, online spending habits or stockpiling behavior.

#### For this hypothesis , we will use the chi-square test to determine if there is a significant relationship between the individuals or household members being diagnosed with COVID-19 and the changes in personal cleanning behavior, online spending habits or stockpiling behavior.

#### The Chi-square test of independence checks whether two variables are likely to be related or not. We have counts for two categorical or nominal variables. We also have an idea that the two variables are not related. The test gives us a way to decide if our idea is plausible or not.

In [14]:
data = pd.read_csv("../Dataset/Data_COVID19_Engl_sh_processed.csv")


In [15]:

def chi_square_test_of_independence(data, column1, column2):
    # Create a contingency table
    contingency_table = pd.crosstab(data[column1], data[column2])
    # Perform the chi-square test
    chi2, p, dof, expected = chi2_contingency(contingency_table)
    p_result = ""
    if p < 0.05:
        p_result = "significant, reject the null hypothesis"
    else:
        p_result = "not significant, fail to reject the null hypothesis"
    return chi2, p, dof, expected, p_result

In [17]:
chi_square_columns = ['sto_n', 'hyg_n', 'int_n']
# Initialize an empty list to store results
hypo1_table_data = []
for column in chi_square_columns:
    chi2, p, dof, expected, p_result = chi_square_test_of_independence(data, column, 'cov_n')
    hypo1_table_data.append({'Variable': column, 'Chi2': chi2, 'p-value': p, 'Dof': dof, 'Result': p_result})


hypo1_table = pd.DataFrame(hypo1_table_data)
# print in the table format in csv
hypo1_table.to_csv('../Results/hypo1_table.csv', index=False)
hypo1_df = pd.read_csv('../Results/hypo1_table.csv')
hypo1_df.head(5)


Unnamed: 0,Variable,Chi2,p-value,Dof,Result
0,sto_n,6.088586,0.013606,1,"significant, reject the null hypothesis"
1,hyg_n,18.903093,0.000821,4,"significant, reject the null hypothesis"
2,int_n,6.707053,0.152203,4,"not significant, fail to reject the null hypot..."


In [19]:
from scipy.stats import mannwhitneyu

def man_whitney_test(data, column):
    covid_diagnosis = data[data['cov_n'] == 1]
    non_covid_diagnosis = data[data['cov_n'] == 0]
    stats, p = mannwhitneyu(covid_diagnosis[column], non_covid_diagnosis[column])
    p_result = ""
    if p < 0.05:
        p_result = "significant, reject the null hypothesis"
    else:
        p_result = "not significant, fail to reject the null hypothesis"

    return stats, p, p_result

hypo1_table_data = []
for column in chi_square_columns:
    stats, p, p_result = man_whitney_test(data, column)
    hypo1_table_data.append({'Variable': column, 'Stats': stats, 'p-value': p, 'Result': p_result})

hypo1_table = pd.DataFrame(hypo1_table_data)
# print in the table format in csv
hypo1_table.to_csv('../Results/hypo1_table_mannwhitney.csv', index=False)
hypo1_df = pd.read_csv('../Results/hypo1_table_mannwhitney.csv')
hypo1_df.head(5)


Unnamed: 0,Variable,Stats,p-value,Result
0,sto_n,171535.5,0.010212,"significant, reject the null hypothesis"
1,hyg_n,149924.5,0.645847,"not significant, fail to reject the null hypot..."
2,int_n,141846.0,0.154336,"not significant, fail to reject the null hypot..."


Stockpiling Behavior (sto_n):

Chi-square test: The p-value (0.013606) is less than the significance level, indicating a significant relationship between being diagnosed with COVID-19 and changes in stockpiling behavior. Therefore, the null hypothesis is rejected.
Mann-Whitney U test: The p-value (0.010212) is less than the significance level, indicating a significant difference in stockpiling behavior between individuals/households diagnosed with COVID-19 and those who are not. Therefore, the null hypothesis is rejected.
Personal Hygiene Behavior (hyg_n):

Chi-square test: The p-value (0.000821) is less than the significance level, indicating a significant relationship between being diagnosed with COVID-19 and changes in personal hygiene behavior. Therefore, the null hypothesis is rejected.
Mann-Whitney U test: The p-value (0.645847) is greater than the significance level, indicating no significant difference in personal hygiene behavior between individuals/households diagnosed with COVID-19 and those who are not. Therefore, the null hypothesis is not rejected.
Online Spending Habits (int_n):

Chi-square test: The p-value (0.152203) is greater than the significance level, indicating no significant relationship between being diagnosed with COVID-19 and changes in online spending habits. Therefore, the null hypothesis is not rejected.
Mann-Whitney U test: The p-value (0.154336) is greater than the significance level, indicating no significant difference in online spending habits between individuals/households diagnosed with COVID-19 and those who are not. Therefore, the null hypothesis is not rejected.
In summary:

There is a significant relationship between being diagnosed with COVID-19 and changes in stockpiling behavior and personal hygiene behavior.
However, there is no significant relationship or difference in online spending habits based on COVID-19 diagnosis.
These results suggest that individuals or households diagnosed with COVID-19 may be more likely to change their stockpiling behavior and personal hygiene habits compared to those who are not diagnosed, but their online spending habits may not be affected significantly.


