Multiple linear regressions
---

In [None]:
import pandas as pd

# Load data
data_df = pd.read_csv('marketing-campaign.csv')
print('data_df shape:', data_df.shape)
data_df.head()

In [None]:
# Extract input matrix X
X = data_df.drop('sales', axis=1).values
print('X:', X.shape)

# Extract target vector y
y = data_df.sales.values
print('y:', y.shape)

In [None]:
from scipy.linalg import lstsq

# Fit a multiple linear regression
w, rss, _, _ = lstsq(X, y)
print('w:', w)
print('RSS:', rss)

In [None]:
import numpy as np

# Add a column of ones
X1 = np.c_[
    np.ones(X.shape[0]), # Vector of ones of shape (n,)
    X # X matrix of shape (n,p)
]

X1[:5, :]

In [None]:
w, rss, _, _ = lstsq(X1, y)

print('w:', w)
print('RSS:', rss)

In [None]:
# Compute predictions
y_pred = np.matmul(X1, w)
print('y_pred:', y_pred.shape)

In [None]:
# Verify RSS score
def RSS(y, y_pred):
    return np.sum(np.square(np.subtract(y, y_pred)))

rss = RSS(y, y_pred)
print('RSS:', rss)