In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.contingency_tables import StratifiedTable, Table2x2

In [None]:
# Read data from CSV file
df = pd.read_csv('relativerisk.csv')

In [5]:
# Create crosstab between married and healthy
crosstab = pd.crosstab(df['married'], df['healthy'], margins=True)

print("Crosstab between married and healthy:")
print(crosstab)

Crosstab between married and healthy:
healthy    0     1   All
married                 
0        192  1104  1296
1        167  1537  1704
All      359  2641  3000


In [7]:
# Calculate percentages within married
crosstab_perc = crosstab.div(crosstab['All'], axis=0) * 100
print("\nPercentages within married:")
print(crosstab_perc)


Percentages within married:
healthy          0          1    All
married                             
0        14.814815  85.185185  100.0
1         9.800469  90.199531  100.0
All      11.966667  88.033333  100.0


In [11]:
# Get counts for 2x2 table
unmarried_unhealthy = crosstab.loc[0, 0]
unmarried_healthy = crosstab.loc[0, 1]
married_unhealthy = crosstab.loc[1, 0]
married_healthy = crosstab.loc[1, 1]

# Construct contingency table
table = np.array([
    [unmarried_unhealthy, unmarried_healthy],
    [married_unhealthy, married_healthy]
])

# Create 2x2 table object
table2x2 = Table2x2(table)

# Calculate odds ratio and confidence interval
oddsratio = table2x2.oddsratio
oddsratio_confint = table2x2.oddsratio_confint()
print(f"\nOdds Ratio: {oddsratio}")
print(f"95% Confidence Interval for Odds Ratio: {oddsratio_confint}")

# Calculate relative risk and confidence interval
# Calculate relative risk manually
unmarried_total = unmarried_unhealthy + unmarried_healthy
married_total = married_unhealthy + married_healthy

# Probability of being unhealthy in unmarried and married groups
prob_unmarried_unhealthy = unmarried_unhealthy / unmarried_total
prob_married_unhealthy = married_unhealthy / married_total

# Calculate relative risk
relative_risk = prob_unmarried_unhealthy / prob_married_unhealthy
print(f"\nRelative Risk: {relative_risk}")


Odds Ratio: 1.6006248372819578
95% Confidence Interval for Odds Ratio: (1.2828176176769495, 1.9971661087438233)

Relative Risk: 1.511643379906853


In [12]:
import math

# Calculate standard error for relative risk
se_relative_risk = math.sqrt(
    (1 - prob_unmarried_unhealthy) / (unmarried_total * prob_unmarried_unhealthy) +
    (1 - prob_married_unhealthy) / (married_total * prob_married_unhealthy)
)

# Calculate 95% confidence interval for relative risk
ci_lower = relative_risk * math.exp(-1.96 * se_relative_risk)
ci_upper = relative_risk * math.exp(1.96 * se_relative_risk)

print(f"95% Confidence Interval for Relative Risk: ({ci_lower}, {ci_upper})")


95% Confidence Interval for Relative Risk: (1.2445729778426209, 1.8360238802365885)


In [13]:
# Adjust for ageclass
ageclasses = sorted(df['ageclass'].unique())
tables = []

print("\nOdds Ratios in each ageclass:")
for age in ageclasses:
    df_age = df[df['ageclass'] == age]
    ct = pd.crosstab(df_age['married'], df_age['healthy'])
    if ct.shape == (2, 2):
        table_age = np.array([
            [ct.loc[0, 0], ct.loc[0, 1]],
            [ct.loc[1, 0], ct.loc[1, 1]]
        ])
        tables.append(table_age)
        table2x2_age = Table2x2(table_age)
        or_age = table2x2_age.oddsratio
        or_ci_age = table2x2_age.oddsratio_confint()
        print(f"Ageclass {age}: Odds Ratio = {or_age}, 95% CI = {or_ci_age}")
    else:
        print(f"Ageclass {age} does not have a full 2x2 table and will be skipped.")


Odds Ratios in each ageclass:
Ageclass 1.0: Odds Ratio = 2.324856439704676, 95% CI = (1.5105171467757021, 3.578216557669361)
Ageclass 2.0: Odds Ratio = 1.7349474219810042, 95% CI = (1.2090900247150256, 2.4895107026856724)
Ageclass 3.0: Odds Ratio = 2.3517705589698568, 95% CI = (1.2537682076774148, 4.411361468706528)
Ageclass 4.0: Odds Ratio = 1.145278450363196, 95% CI = (0.7029006744745528, 1.8660712338152807)


In [14]:
# Create StratifiedTable
mh_table = StratifiedTable(tables)

# Mantel-Haenszel common odds ratio
mh_or = mh_table.oddsratio_pooled
mh_or_confint = mh_table.oddsratio_pooled_confint()
print(f"\nMantel-Haenszel common Odds Ratio: {mh_or}")
print(f"95% Confidence Interval: {mh_or_confint}")


Mantel-Haenszel common Odds Ratio: 1.780649273613681
95% Confidence Interval: (1.4217473900860704, 2.230151331896575)


In [20]:
import pandas as pd
import numpy as np
import math
from statsmodels.stats.contingency_tables import Table2x2
from tabulate import tabulate  # Import tabulate

# Load your data
df = pd.read_csv('relativerisk.csv')

# Crosstab between married and healthy
crosstab = pd.crosstab(df['married'], df['healthy'], margins=True)

# Get counts for 2x2 table
unmarried_unhealthy = crosstab.loc[0, 0]
unmarried_healthy = crosstab.loc[0, 1]
married_unhealthy = crosstab.loc[1, 0]
married_healthy = crosstab.loc[1, 1]

# Construct contingency table
table = np.array([
    [unmarried_unhealthy, unmarried_healthy],
    [married_unhealthy, married_healthy]
])

# Create 2x2 table object
table2x2 = Table2x2(table)

# Calculate odds ratio and confidence interval
oddsratio = table2x2.oddsratio
oddsratio_confint = table2x2.oddsratio_confint()
oddsratio_ci_lower, oddsratio_ci_upper = oddsratio_confint

# Calculate relative risk manually
unmarried_total = unmarried_unhealthy + unmarried_healthy
married_total = married_unhealthy + married_healthy

# Probability of being unhealthy in unmarried and married groups
prob_unmarried_unhealthy = unmarried_unhealthy / unmarried_total
prob_married_unhealthy = married_unhealthy / married_total

# Calculate relative risk
relative_risk = prob_unmarried_unhealthy / prob_married_unhealthy

# Calculate standard error for relative risk
se_relative_risk = math.sqrt(
    (1 - prob_unmarried_unhealthy) / (unmarried_total * prob_unmarried_unhealthy) +
    (1 - prob_married_unhealthy) / (married_total * prob_married_unhealthy)
)

# Calculate 95% confidence interval for relative risk
ci_lower = relative_risk * math.exp(-1.96 * se_relative_risk)
ci_upper = relative_risk * math.exp(1.96 * se_relative_risk)

# Save all results to a dictionary
results = {
    "Odds Ratio": [oddsratio],
    "Odds Ratio CI Lower": [oddsratio_ci_lower],
    "Odds Ratio CI Upper": [oddsratio_ci_upper],
    "Relative Risk": [relative_risk],
    "Relative Risk CI Lower": [ci_lower],
    "Relative Risk CI Upper": [ci_upper]
}

# Convert dictionary to DataFrame
results_df = pd.DataFrame(results)

# Display DataFrame with grid lines in the terminal
print("Calculated Results:")
print(tabulate(results_df, headers='keys', tablefmt='grid'))

# Save to CSV
results_df.to_csv("analysis_results.csv", index=False)
print("Results saved to 'analysis_results.csv'")


Calculated Results:
+----+--------------+-----------------------+-----------------------+-----------------+--------------------------+--------------------------+
|    |   Odds Ratio |   Odds Ratio CI Lower |   Odds Ratio CI Upper |   Relative Risk |   Relative Risk CI Lower |   Relative Risk CI Upper |
|  0 |      1.60062 |               1.28282 |               1.99717 |         1.51164 |                  1.24457 |                  1.83602 |
+----+--------------+-----------------------+-----------------------+-----------------+--------------------------+--------------------------+
Results saved to 'analysis_results.csv'
