In Scikit-learn
---

In [1]:
import pandas as pd

# Load data
data_df = pd.read_csv('marketing-campaign.csv')
X = data_df.drop('sales', axis=1).values
y = data_df.sales.values

In [2]:
from sklearn.linear_model import LinearRegression

# Create a linear regression object
lr = LinearRegression()

# Fit the model
lr.fit(X, y)

# Print coefficients
print('Coefficients:', lr.coef_)
print('Intercept:', lr.intercept_)

Coefficients: [0.39465146 0.47037002 0.30669954]
Intercept: 0.024870917888195176


In [3]:
# For reference: scipy lstsq returns
# w: [ 0.02487092  0.39465146  0.47037002  0.30669954]

In [4]:
# Compute predictions
y_pred = lr.predict(X)
y_pred[:3]

array([1.24462012, 4.84934038, 4.04266482])

In [5]:
import numpy as np

y_pred = np.matmul(X, lr.coef_) + lr.intercept_
y_pred[:3]

array([1.24462012, 4.84934038, 4.04266482])

In [6]:
# Compute the R2 cofficient
R2 = lr.score(X, y)
print('R2:', R2)

R2: 0.9832893048848236


In [7]:
# For reference: we found 0.983289304885 in the last unit

In [11]:
from sklearn.linear_model import SGDRegressor

# Create the SGDRegressor object
lr_sgd = SGDRegressor(
    loss='squared_loss', # Cost function
    penalty='none', # Add a penalty term?
    max_iter=1000, # Number of iterations
    random_state=0, # to prevent that the implementation shuffles the data
    tol = 1e-3 # Tolerance for improvement (stop SGD once loss is below)
)

# Fit the linear regression model
lr_sgd.fit(X, y)

# Print coefficients
print('Coefficients:', lr_sgd.coef_)
print('Intercept:', lr_sgd.intercept_)

Coefficients: [0.39968853 0.44409771 0.25894341]
Intercept: [0.12807209]


In [12]:
# Compute R2 coefficient
R2_sgd = lr_sgd.score(X, y)
print('R2_sgd:', R2_sgd)

R2_sgd: 0.9821546772612869


In [13]:
from sklearn.linear_model import HuberRegressor

# Create the estimator
huber = HuberRegressor(epsilon=1.35)

# Fit it to X,y
huber.fit(X, y)

print('Coefficients:', huber.coef_)
print('Intercept:', huber.intercept_)
print('R^2 coefficient:', huber.score(X, y))

Coefficients: [0.39172544 0.4788203  0.29315421]
Intercept: 0.04586298819194033
R^2 coefficient: 0.983070157114285
