In [1]:
# Array and dataframe handling.
import pandas as pd
import numpy as np
# Statistics and modeling.
from scipy import stats
import statsmodels.formula.api as smf
# Utility.
from lib.utils import clean_cats, stepwise, classify, mixed

## Get Data and combine

In [2]:
anchor_p1 = pd.read_stata("dta/Data/Stata/anchor13_capi.dta")
anchor_p2 = pd.read_stata("dta/Data/Stata/anchor13_cati.dta")
partner_df = pd.read_stata("dta/Data/Stata/partner13.dta")

# Pre cleaning.
anchor_p1_cleaned = clean_cats(anchor_p1, ["sat5", "sex_gen", "val1i3", "sex8"])
anchor_p2_cleaned = clean_cats(anchor_p2, ["sat5", "sex_gen", "val1i3", "sex8"])
partner_df_cleaned = clean_cats(partner_df, ["psat5", "psex", "pval1i3"])

# Combine anchors.
anchor_df = pd.concat([anchor_p1_cleaned, anchor_p2_cleaned])
assert anchor_df.shape[0] == anchor_p1_cleaned.shape[0] +  anchor_p2_cleaned.shape[0], "Length of combined df not sum of single dfs!"

# Join anchors with partners.
data_df = anchor_df.join(partner_df_cleaned.set_index("id"), on=("id"), how="inner", rsuffix="_p")
assert data_df.shape[0] == 2385, "There should be 2385 partner-anchors combos!"

transformed_df = data_df.copy()

# Filter
# same sex couples.
n_all = transformed_df.shape[0]
transformed_df = transformed_df[transformed_df["sex_gen"] != transformed_df["psex"]]
n_hetero = transformed_df.shape[0]
n_homo = n_all - n_hetero
# Create variables.
# Sexual satisfaction by gender.
transformed_df = transformed_df.dropna(subset=["psex", "sex_gen", "sat5", "psat5"])
transformed_df["fsat5"] = np.where(transformed_df["sex_gen"] == 2, transformed_df["sat5"], transformed_df["psat5"])
transformed_df["msat5"] = np.where(transformed_df["sex_gen"] == 1, transformed_df["sat5"], transformed_df["psat5"])
# Difference of satisfaction.
transformed_df["difsat5"] = transformed_df["fsat5"] - transformed_df["msat5"]
# Traditional values by gender.
transformed_df = transformed_df.dropna(subset=["val1i3", "pval1i3"])
transformed_df["fval1i3"] = np.where(transformed_df["sex_gen"] == 2, transformed_df["val1i3"], transformed_df["pval1i3"])
transformed_df["mval1i3"] = np.where(transformed_df["sex_gen"] == 1, transformed_df["val1i3"], transformed_df["pval1i3"])
# Difference of values.
transformed_df["difval1i3"] = transformed_df["fval1i3"] - transformed_df["mval1i3"]

# Age
transformed_df = transformed_df.dropna(subset=["age", "page"])
transformed_df["fage"] = np.where(transformed_df["sex_gen"] == 2, transformed_df["age"], transformed_df["page"]).astype(int)
transformed_df["mage"] = np.where(transformed_df["sex_gen"] == 1, transformed_df["age"], transformed_df["page"]).astype(int)


# Categorisation of values by gender.
transformed_df["ftrad"] = transformed_df["fval1i3"] >= 3
transformed_df["mtrad"] = transformed_df["mval1i3"] >= 3

transformed_df["fegal"] = transformed_df["fval1i3"] < 3
transformed_df["megal"] = transformed_df["mval1i3"] < 3

# Relationship combination of values.
transformed_df["both"] = transformed_df.apply(classify, axis=1)
transformed_df["both2"] = transformed_df.apply(mixed, axis=1)

# Some more cleanning.
transformed_df = transformed_df.dropna(subset=["sex8"])
transformed_df["sex8"] = transformed_df["sex8"].astype(int)

print(f"{len(transformed_df)} partner-anchors")

# More cleaning.
final_df = transformed_df[transformed_df["reldur"] != "-7 Incomplete data"].copy()
final_df["reldur"] = final_df["reldur"].astype(int)

1826 partner-anchors


## Individual regressions

In [3]:
stepwise("fsat5", ["fegal", "reldur", "fage", "sex8" ], final_df)

Step 0:
                            OLS Regression Results                            
Dep. Variable:                  fsat5   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     2.281
Date:                Mon, 28 Oct 2024   Prob (F-statistic):              0.131
Time:                        22:34:38   Log-Likelihood:                -4163.5
No. Observations:                1812   AIC:                             8331.
Df Residuals:                    1810   BIC:                             8342.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept         6.7136      0.085   

In [4]:
stepwise("msat5", ["megal", "reldur", "mage", "sex8" ], final_df)

Step 0:
                            OLS Regression Results                            
Dep. Variable:                  msat5   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                   0.06181
Date:                Mon, 28 Oct 2024   Prob (F-statistic):              0.804
Time:                        22:34:38   Log-Likelihood:                -4270.2
No. Observations:                1812   AIC:                             8544.
Df Residuals:                    1810   BIC:                             8555.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept         6.3709      0.093   

## Relationship regressions

In [5]:
stepwise("fsat5", ['C(both, Treatment(reference="egal/egal"))', "reldur", "fage", "sex8" ], final_df)

Step 0:
                            OLS Regression Results                            
Dep. Variable:                  fsat5   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                    0.7811
Date:                Mon, 28 Oct 2024   Prob (F-statistic):              0.504
Time:                        22:34:38   Log-Likelihood:                -4163.5
No. Observations:                1812   AIC:                             8335.
Df Residuals:                    1808   BIC:                             8357.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------

In [6]:
stepwise("msat5", ['C(both, Treatment(reference="egal/egal"))', "reldur", "mage", "sex8" ], final_df)

Step 0:
                            OLS Regression Results                            
Dep. Variable:                  msat5   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.2708
Date:                Mon, 28 Oct 2024   Prob (F-statistic):              0.846
Time:                        22:34:38   Log-Likelihood:                -4269.8
No. Observations:                1812   AIC:                             8548.
Df Residuals:                    1808   BIC:                             8570.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------