In [26]:
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import sys
sys.path.append("..")
from src.constrainedlr.model import ConstrainedLinearRegression

### Load the dataset

In [3]:
dataset = load_diabetes()
X = pd.DataFrame(dataset["data"], columns=dataset["feature_names"])
y = dataset["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

### Fit unconstrained

In [30]:
model = Pipeline([
    ("scaler", StandardScaler().set_output(transform="pandas")),
    ("reg", ConstrainedLinearRegression(fit_intercept=True, alpha=1.0))
])
model.fit(X_train, y_train)

In [37]:
dict(zip(X.columns, model["reg"].coef_))

{'age': -1.9215663593159094,
 'sex': -8.918073842942393,
 'bmi': 28.379887469475158,
 'bp': 13.246719889044059,
 's1': -23.916134757876332,
 's2': 11.562987807426458,
 's3': -2.9415884691023626,
 's4': 2.6439050108452884,
 's5': 35.48022931279615,
 's6': 1.810491373142501}

### Fit constrained

In [47]:
model = Pipeline([
    ("scaler", StandardScaler().set_output(transform="pandas")),
    ("reg", ConstrainedLinearRegression(fit_intercept=True, alpha=1.0))
])
model.fit(X_train, y_train, reg__features_sign_constraints={"s3": 1, "s4": -1})

In [48]:
dict(zip(X.columns, model["reg"].coef_))

{'age': -2.015221727415579,
 'sex': -8.516259556481547,
 'bmi': 28.325323305631603,
 'bp': 13.177584635396611,
 's1': -32.69510053522447,
 's2': 20.237103508511588,
 's3': 0.002696460032150405,
 's4': -0.0007771233289657967,
 's5': 40.02522281256512,
 's6': 2.034123319794976}