In [None]:
!pip install numpy pandas scikit-learn dowhy causal-learn


Collecting dowhy
  Downloading dowhy-0.14-py3-none-any.whl.metadata (18 kB)
Collecting causal-learn
  Downloading causal_learn-0.1.4.3-py3-none-any.whl.metadata (4.6 kB)
Collecting cvxpy<2.0.0,>=1.7.1 (from dowhy)
  Downloading cvxpy-1.7.5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (9.5 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting momentchi2 (from causal-learn)
  Downloading momentchi2-0.1.8-py3-none-any.whl.metadata (6.1 kB)
Downloading dowhy-0.14-py3-none-any.whl (403 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m403.1/403.1 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading causal_learn-0.1.4.3-py3-none-any.whl (192 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.0/193.0

In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)
n = 2000

Z = np.random.normal(size=n)              # confounder
X1 = Z + np.random.normal(size=n)
X2 = np.random.normal(size=n)
Y  = 2*X1 + 3*X2 + 1.5*Z + np.random.normal(size=n)
X3 = Y + np.random.normal(size=n)          # anti-causal

df = pd.DataFrame({
    "Z": Z,
    "X1": X1,
    "X2": X2,
    "X3": X3,
    "Y": Y
})


In [None]:
from causallearn.search.ConstraintBased.PC import pc
from causallearn.utils.cit import fisherz

data = df[["Z", "X1", "X2", "X3", "Y"]].values
cg = pc(data, alpha=0.05, indep_test=fisherz)

print(cg.G)


  0%|          | 0/5 [00:00<?, ?it/s]

Graph Nodes:
X1;X2;X3;X4;X5

Graph Edges:
1. X1 --- X2
2. X1 --> X5
3. X2 --> X5
4. X3 --> X5
5. X5 --> X4



In [None]:
from dowhy import CausalModel

selected_features = []

for feature in ["Z", "X1", "X2", "X3"]:
    model = CausalModel(
        data=df,
        treatment=feature,
        outcome="Y",
        common_causes=[c for c in ["Z", "X1", "X2", "X3"] if c != feature]
    )

    estimand = model.identify_effect()
    estimate = model.estimate_effect(
        estimand,
        method_name="backdoor.linear_regression"
    )

    if abs(estimate.value) > 0.2:
        selected_features.append(feature)

print("Causally selected features:", selected_features)




Causally selected features: ['Z', 'X1', 'X2', 'X3']


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

def intervene(df, feature, value):
    df_int = df.copy()
    df_int[feature] = value
    return df_int

baseline_model = LinearRegression().fit(
    df[selected_features], df["Y"]
)

baseline_preds = baseline_model.predict(df[selected_features])

for f in selected_features:
    df_int = intervene(df, f, df[f].mean() + 2)
    preds = baseline_model.predict(df_int[selected_features])
    print(f, "Δ prediction mean:", np.mean(preds - baseline_preds))


Z Δ prediction mean: 1.4289094040218533
X1 Δ prediction mean: 1.9179038037705127
X2 Δ prediction mean: 2.918396120014052
X3 Δ prediction mean: 1.0451477018161208


In [None]:
final_model = LinearRegression()
final_model.fit(df[selected_features], df["Y"])

print("Coefficients:")
for f, c in zip(selected_features, final_model.coef_):
    print(f, round(c, 3))


Coefficients:
Z 0.714
X1 0.959
X2 1.459
X3 0.523
