In [61]:
import pandas as pd


url = ## demographics csv here
df = pd.read_csv(url)

In [62]:

#age analysis
age_mapping = {
    '18-21 anos': '18-21',
    '22-25 anos': '22-25',
    '26-29 anos': '26-29',
    '30-33 anos': '30-33',
    '34-37 anos': '34-37',
    '38-40 anos': '38-40',
    'Mais de 40 anos': 'Mais de 40'
}

df['age_group'] = df['age'].map(age_mapping)

contingency_table = pd.crosstab(df['experiment_type'], df['age_group'])

# chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Chi-square Statistic:', chi2)
print('P-value:', p)
print(contingency_table)


Chi-square Statistic: 52.78111399558882
P-value: 0.006266133657895473
age_group        18-21  22-25  26-29  30-33  34-37  38-40  Mais de 40
experiment_type                                                      
0                   11     30     11     10     11      5           0
1                    9     18     17      7      5      3           9
2                   13     23     11     15      3      2           3
3                    6     20     11     15      9      4           2
4                    9     26      9     14      6      2           4
5                   16     16     24     12      6      3           0


In [63]:
#gender analysis

contingency_table = pd.crosstab(df['experiment_type'], df['gender'])
# chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

print("Chi-square Statistic:", chi2)
print("P-value:", p)

print(contingency_table)


Chi-square Statistic: 9.399216900690627
P-value: 0.49468119559862234
gender           Feminino  Masculino  Outro
experiment_type                            
0                      78          0      0
1                      64          3      1
2                      68          2      0
3                      66          1      0
4                      69          1      0
5                      75          2      0


In [64]:
#education analysis

contingency_table = pd.crosstab(df['experiment_type'], df['education'])

# chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)


print(f"Chi-square Statistic: {chi2}")
print(f"P-value: {p}")



new_order = [
    'Ensino Médio incompleto',
    'Ensino Médio completo',
    'Ensino superior incompleto',
    'Ensino superior completo',
    'Pós graduação incompleta',
    'Pós graduação completa'
]

contingency_table = contingency_table.reindex(new_order, axis=1)
print(contingency_table)

### ---- convert to years of study

years_of_study = {
    'Ensino Médio incompleto': 10,
    'Ensino Médio completo': 12,
    'Ensino superior incompleto': 14,
    'Ensino superior completo': 16,
    'Pós-graduação incompleta': 18,
    'Pós-graduação completa': 20
}

df['years_of_study'] = df['education'].map(years_of_study)


average_years_by_group = df.groupby('experiment_type')['years_of_study'].mean()

print(average_years_by_group)

Chi-square Statistic: 29.081486938621417
P-value: 0.2605239697376398
education        Ensino Médio incompleto  Ensino Médio completo  \
experiment_type                                                   
0                                      0                     19   
1                                      3                     14   
2                                      0                     10   
3                                      0                     14   
4                                      2                     10   
5                                      2                     16   

education        Ensino superior incompleto  Ensino superior completo  \
experiment_type                                                         
0                                        14                        28   
1                                        23                        18   
2                                        25                        17   
3                            

In [65]:
# marital status analysis

contingency_table = pd.crosstab(df['experiment_type'], df['marital status '])

# chi-square
chi2, p, dof, expected = chi2_contingency(contingency_table)


print(f"Chi-square Statistic: {chi2}")
print(f"P-value: {p}")

print(contingency_table)

#------- single %

contingency_table['Solteiro_Percent'] = (contingency_table['Solteiro(a)'] / contingency_table.sum(axis=1)) * 100

percentage_solteiro = contingency_table['Solteiro_Percent']
print(percentage_solteiro)

Chi-square Statistic: 18.2888507139887
P-value: 0.5683866783759819
marital status   Casado(a)  Outro  Separado(a)  Solteiro(a)  União Estável
experiment_type                                                           
0                       15      2            1           52              8
1                       20      0            1           37             10
2                       17      0            0           47              6
3                       17      2            0           42              6
4                       15      1            0           51              3
5                       18      0            2           50              7
experiment_type
0    66.666667
1    54.411765
2    67.142857
3    62.686567
4    72.857143
5    64.935065
Name: Solteiro_Percent, dtype: float64


In [66]:
# movie enthusiast analysis

contingency_table = pd.crosstab(df['experiment_type'], df['movie enthusiast'])
chi2, p, dof, expected = chi2_contingency(contingency_table)

print(f"Chi-square Statistic: {chi2}")
print(f"P-value: {p}")
print(contingency_table)

likert_scale = {
    'Concordo Totalmente': 7,
    'Concordo Moderadamente': 6,
    'Concordo Pouco': 5,
    'Nem Concordo, Nem Discordo': 4,
    'Discordo Pouco': 3,
    'Discordo Moderadamente': 2,
    'Discordo Totalmente': 1
}

df['enthusiast_score'] = df['movie enthusiast'].map(likert_scale)

# get average scores
average_scores = df.groupby('experiment_type')['enthusiast_score'].mean()

print(average_scores)

Chi-square Statistic: 25.024960507519317
P-value: 0.7238179407225658
movie enthusiast  Concordo Moderadamente  Concordo Pouco  Concordo Totalmente  \
experiment_type                                                                 
0                                     23              20                   13   
1                                     17              14                   15   
2                                     25              18                   14   
3                                     28              12                   12   
4                                     28              13                   16   
5                                     25              15                   19   

movie enthusiast  Discordo Moderadamente  Discordo Pouco  Discordo Totalmente  \
experiment_type                                                                 
0                                      2               3                    2   
1                                      

In [67]:
# trust analysis

contingency_table = pd.crosstab(df['experiment_type'], df['trust'])

chi2, p, dof, expected = chi2_contingency(contingency_table)

print(f"Chi-square Statistic: {chi2}")
print(f"P-value: {p}")
print(contingency_table)

likert_scale = {
    'Concordo Totalmente': 7,
    'Concordo Moderadamente': 6,
    'Concordo Pouco': 5,
    'Nem Concordo, Nem Discordo': 4,
    'Discordo Pouco': 3,
    'Discordo Moderadamente': 2,
    'Discordo Totalmente': 1
}

df['trust_score'] = df['trust'].map(likert_scale)

average_scores = df.groupby('experiment_type')['trust_score'].mean()

print(average_scores)

Chi-square Statistic: 33.58332224452476
P-value: 0.2977954985012616
trust            Concordo Moderadamente  Concordo Pouco  Concordo Totalmente  \
experiment_type                                                                
0                                    30              17                   15   
1                                    24              21                    9   
2                                    27              18                   16   
3                                    28              18                   16   
4                                    33               7                   17   
5                                    25              23                   15   

trust            Discordo Moderadamente  Discordo Pouco  Discordo Totalmente  \
experiment_type                                                                
0                                     3               3                    2   
1                                     1            