In [105]:
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np

def create_gender_boxplots(test_type, csv_file, correct_answers):

    # Data is sorted.
    dataframe= pd.read_csv(csv_file)    
    dataframe_sorted= dataframe.sort_values("gender", ascending= True)
    female_dataframe= dataframe_sorted[dataframe_sorted["gender"]== "f"]
    male_dataframe= dataframe_sorted[dataframe_sorted["gender"]== "m"]

    # Graph is plotted.
    fig, ax= plt.subplots()
    ax.boxplot([female_dataframe[correct_answers], male_dataframe[correct_answers]])
    ax.set_xlabel("Gender")
    ax.set_ylabel(f"Number of Correct Answers in {test_type} Test")
    ax.set_xticklabels(["Female", "Male"])
    ax.set_title(f"Spread of Correct Answers of Male and Female {test_type} Test Takers")
    plt.show()    
    save= input("Type yes if you want to save your boxplot.")
    
    if save== "yes":
        fig.savefig(f"{test_type} Test Gender Boxplot")
        
    else:
        return
   

def t_test_between_genders(test_type, csv_file, correct_answers):
    
    t_test_table= {
        "Test": [],
        "Mean score male": [],
        "Male standard error": [],
        "Mean score female": [],
        "Female standard error": [],
        "Difference in score": [],
        "P value": []
    }
    # Layout is similar to the above function.
    dataframe= pd.read_csv(csv_file)    
    dataframe_sorted= dataframe.sort_values("gender", ascending= True)
    female_dataframe= dataframe_sorted[dataframe_sorted["gender"]== "f"]
    male_dataframe= dataframe_sorted[dataframe_sorted["gender"]== "m"]

    female_mean= female_dataframe[correct_answers].mean()
    male_mean= male_dataframe[correct_answers].mean()
    difference= female_mean- male_mean
    female_se= stats.sem(female_dataframe[correct_answers])
    male_se= stats.sem(male_dataframe[correct_answers])
    t_test= stats.ttest_ind(female_dataframe[correct_answers].values, male_dataframe[correct_answers].values)
    print(f"{test_type} Test p value is {t_test.pvalue}.")
    
    if t_test.pvalue<= 0.05:
        print("Result is significant a the 0.05 level.")
        
    else:
        print("The result is not significant and we fail to detect a difference in means at the 0.05 level.")

    # Table for t-test information is made.
    t_test_table["Test"].append(test_type)
    t_test_table["Mean score male"].append(male_mean)
    t_test_table["Male standard error"].append(male_se)
    t_test_table["Mean score female"].append(female_mean)
    t_test_table["Female standard error"].append(female_se)
    t_test_table["Difference in score"].append(difference)
    t_test_table["P value"].append(t_test.pvalue)
    table= input("Print t test table?")
            
    if table== "yes":
        t_test_table_df= pd.DataFrame(t_test_table)
        final_table= t_test_table_df.pivot_table(values= ["Mean score male", "Male standard error", "Mean score female", "Female standard error", "Difference in score", "P value"], index= "Test")
        display(final_table)
        save= input("Type yes if you want to save your table.")
    
        if save== "yes":
            final_table.to_csv("t-test summary table.csv")
            
    # Dot plot for t-test is created.        
    fig = plt.figure()
    ax = fig.add_subplot()
    ax.plot(female_dataframe["gender"], female_dataframe[correct_answers], "o", alpha= 0.3, color= "red")
    ax.plot(male_dataframe["gender"], male_dataframe[correct_answers], "o", alpha= 0.3, color= "blue")
    ax.errorbar("f", female_mean,  yerr= female_se,  marker= "_",  color= "black")
    ax.errorbar("m", male_mean,  yerr= male_se,  marker= "_",  color= "black")
    ax.set_xlim(-1, 2)
    ax.set_ylabel("Number of correct answers")
    ax.set_xlabel("Gender")
    ax.set_title(f"Number of Correct Answers in {test_type} Test of Males vs Females")
    plt.show()

def stats_analysis():
    
    test_type= ""
    
    # This is asked in order to put the correct title in the graph, as well as to select the correct csv file.
    for i in range (2):
        test_type = input("Please enter maths, memory, or ANS: ")
        
        if test_type== "maths":
            test_type.title()
            csv_file= "Maths Data.csv"
            correct_answers= "correct_answers"

        elif test_type== "memory":
            test_type.title()
            csv_file= "memory Data.csv"
            correct_answers= "n_correct_answers"

        elif test_type== "ans":
            test_type.upper()
            csv_file= ""
            correct_answers= ""

        elif test_type== "quit":
            return
    
        print("Please enter which function you would like to use.")
        function= input("boxplot or t_test")
    
        if function== "boxplot":
            create_gender_boxplots(test_type, csv_file, correct_answers)

        elif function== "t_test":
            t_test_between_genders(test_type, csv_file, correct_answers)

In [106]:
stats_analysis()

Please enter maths, memory, or ANS:  maths


Maths
Please enter which function you would like to use.


boxplot or t_test 
Please enter maths, memory, or ANS:  


Please enter which function you would like to use.


boxplot or t_test 
