https://epiren.medium.com/p-hacking-for-beginners-996d0e8f5094

In [3]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
from scipy.stats import fisher_exact

def analyze_data(counts_multiplier):
    # Create data
    exposure = np.repeat(["Ate", "Did Not Eat"], repeats=2)
    status = np.tile(["Ill", "Not Ill"], 2)
    count = np.array([4, 1, 1, 2]) * counts_multiplier
    data = pd.DataFrame({"exposure": exposure, "status": status, "count": count})
    
    # Summarize data to create the correct contingency table
    summarized_data = data.groupby(['exposure', 'status']).agg({'count': 'sum'}).reset_index()
    
    # Convert to a matrix format expected by fisher_exact
    contingency_table = pd.pivot_table(summarized_data, values='count', index='exposure', columns='status', aggfunc=np.sum)
    
    # Fisher's Exact test requires a 2x2 table, ensure the table is in the correct format
    if contingency_table.shape == (2, 2):
        # Perform Fisher's Exact test
        oddsratio, p_value = fisher_exact(contingency_table)
        
        # Print the result
        print(f"Fisher's Exact test result with {counts_multiplier} multiplier:")
        print(f"Odds ratio: {oddsratio}, p-value: {p_value}")
        print(contingency_table)
        print("\n")
    else:
        print("Contingency table is not 2x2, Fisher's Exact test cannot be performed.")

# Analyze data with different multipliers for count
analyze_data(1)  # With 5 cases and 3 controls
analyze_data(2)  # With 10 cases and 6 controls
analyze_data(3)  # With 15 cases and 9 controls
analyze_data(4)  # With 20 cases and 12 controls


Fisher's Exact test result with 1 multiplier:
Odds ratio: 8.0, p-value: 0.4642857142857142
status       Ill  Not Ill
exposure                 
Ate            4        1
Did Not Eat    1        2


Fisher's Exact test result with 2 multiplier:
Odds ratio: 8.0, p-value: 0.11813186813186813
status       Ill  Not Ill
exposure                 
Ate            8        2
Did Not Eat    2        4


Fisher's Exact test result with 3 multiplier:
Odds ratio: 8.0, p-value: 0.03605419180361972
status       Ill  Not Ill
exposure                 
Ate           12        3
Did Not Eat    3        6


Fisher's Exact test result with 4 multiplier:
Odds ratio: 8.0, p-value: 0.02127324320824345
status       Ill  Not Ill
exposure                 
Ate           16        4
Did Not Eat    4        8




  contingency_table = pd.pivot_table(summarized_data, values='count', index='exposure', columns='status', aggfunc=np.sum)
  contingency_table = pd.pivot_table(summarized_data, values='count', index='exposure', columns='status', aggfunc=np.sum)
  contingency_table = pd.pivot_table(summarized_data, values='count', index='exposure', columns='status', aggfunc=np.sum)
  contingency_table = pd.pivot_table(summarized_data, values='count', index='exposure', columns='status', aggfunc=np.sum)
