***13.Linear Regression***

In [1]:
#13.1 Fitting a line
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()
X = diabetes.data[:, None, 2]
y = diabetes.target
model = LinearRegression()
model.fit(X, y)
print(model.coef_, model.intercept_)
print(model.predict(X[:5]))

[949.43526038] 152.13348416289617
[210.71003806 103.26219543 194.33703347 141.12476855 117.58857445]


In [2]:
y[0]*1000

np.float64(151000.0)

In [3]:
model.predict(X)[0]*1000

np.float64(210710.03806366966)

In [4]:
model.coef_[0]*1000, model.intercept_*1000

(np.float64(949435.2603840387), np.float64(152133.48416289617))

In [5]:
#13.2 Handling Interactive Effects
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.datasets import load_diabetes
features = load_diabetes().data
target = load_diabetes().target
interaction = PolynomialFeatures(degree=3, interaction_only=True, include_bias=False)
features_interaction = interaction.fit_transform(features)
reg=LinearRegression()
model=reg.fit(features_interaction, target)


In [6]:
features[0]

array([ 0.03807591,  0.05068012,  0.06169621,  0.02187239, -0.0442235 ,
       -0.03482076, -0.04340085, -0.00259226,  0.01990749, -0.01764613])

In [8]:
import numpy as np
interaction_term=np.multiply(features[0, :], features[0, :] )

In [9]:
interaction_term[0]

np.float64(0.001449774650726785)

In [10]:
features_interaction[0]

array([ 3.80759064e-02,  5.06801187e-02,  6.16962065e-02,  2.18723855e-02,
       -4.42234984e-02, -3.48207628e-02, -4.34008457e-02, -2.59226200e-03,
        1.99074862e-02, -1.76461252e-02,  1.92969146e-03,  2.34913899e-03,
        8.32810904e-04, -1.68384979e-03, -1.32583211e-03, -1.65252654e-03,
       -9.87027253e-05,  7.57995581e-04, -6.71892210e-04,  3.12677107e-03,
        1.10849509e-03, -2.24125215e-03, -1.76472040e-03, -2.19956001e-03,
       -1.31376146e-04,  1.00891376e-03, -8.94307718e-04,  1.34944321e-03,
       -2.72842209e-03, -2.14830898e-03, -2.67766754e-03, -1.59932732e-04,
        1.22821638e-03, -1.08869898e-03, -9.67273406e-04, -7.61613149e-04,
       -9.49280028e-04, -5.66989538e-05,  4.35424212e-04, -3.85962852e-04,
        1.53989595e-03,  1.91933723e-03,  1.14638894e-04, -8.80378683e-04,
        7.80373388e-04,  1.51125055e-03,  9.02645403e-05, -6.93193855e-04,
        6.14451539e-04,  1.12506363e-04, -8.64001735e-04,  7.65856754e-04,
       -5.16054199e-05,  

In [1]:
#13.3 Fitting a Non-linear Relationship
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import PolynomialFeatures

cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly.fit_transform(X)
reg = LinearRegression()
model=reg.fit(X_poly, y)


In [3]:
X[0]


array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])

In [4]:
X[0]**2

array([3.2364010e+02, 1.0774440e+02, 1.5079840e+04, 1.0020010e+06,
       1.4018560e-02, 7.7061760e-02, 9.0060010e-02, 2.1638410e-02,
       5.8515610e-02, 6.1952641e-03, 1.1990250e+00, 8.1956809e-01,
       7.3770921e+01, 2.3531560e+04, 4.0947201e-05, 2.4049216e-03,
       2.8869129e-03, 2.5185690e-04, 9.0180090e-04, 3.8353249e-05,
       6.4414440e+02, 3.0032890e+02, 3.4077160e+04, 4.0763610e+06,
       2.6308840e-02, 4.4302336e-01, 5.0680161e-01, 7.0437160e-02,
       2.1169201e-01, 1.4137210e-02])

In [5]:
X_poly[0]

array([1.79900000e+01, 1.03800000e+01, 1.22800000e+02, ...,
       2.51701800e-02, 6.50453032e-03, 1.68091427e-03])

In [6]:
#13.4 Reducing Variance with Regularization
from sklearn.linear_model import Ridge
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
reg = Ridge(alpha=1.0)
model = reg.fit(X_scaled, y)

In [7]:
from sklearn.linear_model import RidgeCV
reg = RidgeCV(alphas=[0.1, 1.0, 10.0])
model = reg.fit(X_scaled, y)
model.coef_

array([-4.46961783e-02, -1.50583980e-02, -6.85047461e-02,  1.07646375e-01,
        4.70773033e-03,  1.78082387e-01, -1.11962199e-01, -8.21221289e-02,
        3.94653861e-03,  1.19882828e-02, -1.84734297e-01,  8.75701411e-03,
        3.83181892e-02,  9.79780439e-02, -4.52612147e-02,  1.78629860e-04,
        1.02124574e-01, -4.90608799e-02, -1.04261083e-02,  1.83627607e-02,
       -3.85897007e-01, -5.36779506e-02, -4.80368207e-02,  2.68017729e-01,
       -2.13883862e-02,  9.35267946e-03, -7.71488110e-02, -5.05950745e-02,
       -4.44449308e-02, -8.80861354e-02])

In [10]:
model.alphas

[0.1, 1.0, 10.0]

In [11]:
#13.5 reducing Features with Lasso
from sklearn.linear_model import Lasso
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)
reg = Lasso(alpha=0.1)
model = reg.fit(X_scaled, y)
model.coef_

array([-0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.01720381, -0.        ,  0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.1296136 , -0.01093637, -0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.16202985, -0.        , -0.        ])

In [13]:
reg_a10 = Lasso(alpha=10.0)
model_a10 = reg_a10.fit(X_scaled, y)
model_a10.coef_

array([-0., -0., -0., -0., -0., -0., -0., -0., -0.,  0., -0.,  0., -0.,
       -0.,  0., -0., -0., -0.,  0., -0., -0., -0., -0., -0., -0., -0.,
       -0., -0., -0., -0.])