## Part 1: Linear Regression

![](https://mlfromscratch.com/content/images/2020/01/linearRegression2-3.png)

In [None]:
import torch as th
from torch import nn
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
import numpy as np
from time import time

In [None]:
# limiting the randomness for reproductibility
np.random.seed(42)
th.manual_seed(42)

# the number of samples to benchmark our linear regressions
no_inputs = [10**i for i in range(3, 9)]

In [None]:
class NumpyLinearRegression:
    def __init__(self):
        self.weights = None
        
    
    def fit(self, X, y):
        # TODO 1:
        # Code your linear regression in NumPy.
        # Formula: (X' * X)^-1 * X' * y
        # PS: Don't forget the ones for the free term
        # Hints: np.column_stack, np.matmul, np.linalg.inv, np.transpose, ndarray.T
        pass
    
    def predict(self, X):
        # TODO 2:
        # Code your Numpy inference.
        # Formula: X * W
        # PS: Don't forget the onfes for the free term
        # Hints: np.column_stack, np.matmul
        pass


In [None]:
# generate samples for our numpy linear regression to test
X, y = make_regression(n_samples=100, n_features=1, n_informative=7, bias=1, noise=100)

# create the numpy linear regression
np_lin_reg = NumpyLinearRegression()

# fit it to our data
np_lin_reg.fit(X, y)

# plot the results
plt.scatter(X, y)
plt.plot(X, np_lin_reg.predict(X), color='orange')

In [None]:
# let's benchmark our results with numpy
results_numpy = []

for no_samples in no_inputs:
    X, y = make_regression(n_samples=no_samples, n_features=10, n_informative=7, bias=1, noise=100)
    reg = NumpyLinearRegression()
    start = time()    
    reg.fit(X, y)
    end = time()
    results_numpy.append(end - start)

In [None]:
class TorchLinearRegression(nn.Module):
    def __init__(self, device="cpu"):
        self.weights = None
        self.device = device

    def fit(self, X, y):
        # TODO 3:
        # Code your linear regression in PyTorch.
        
        # Remember that the Numpy and PyTorch APIs are quite similar.
        
        # Be aware to always send your intermediary data to the right device! We want
        # to benchmark on the cpu and the gpu as well!
        
        # Formula: (X' * X)^-1 * X' * y
        # PS: Don't forget the ones for the free term!
        pass
    
    def predict(self, X):
        # TODO 4:
        # Code your own inference.
        # Formula: X * W
        # Don't forget the ones for the free term!
        pass

In [None]:
# generate samples for our numpy linear regression to test
X, y = make_regression(n_samples=100, n_features=1, n_informative=7, bias=1, noise=100)

# this is needed to convert the numpy array to a proper tensor
y = y.reshape((-1, 1))

# Create the torch Linear Regression
lin_reg = TorchLinearRegression()
X, y = th.from_numpy(X), th.from_numpy(y)
lin_reg.fit(X, y)

# plot the results
plt.scatter(X, y)
plt.plot(X.numpy(), lin_reg.predict(X), color='orange')

In [None]:
# let's benchmark our results with torch

results_torch = {
    "cpu": [],
    "cuda": []
}

for device in ["cpu", "cuda"]:
    if not th.cuda.is_available() and device == "cuda":
        results_torch["cuda"] = [0] * len(no_inputs)
        continue
    
    for no_samples in no_inputs:
        X, y = make_regression(n_samples=no_samples, n_features=10, n_informative=7, bias=1, noise=100)
        y = y.reshape((-1, 1))
        reg = TorchLinearRegression(device=device)
        X, y = th.from_numpy(X).to(device), th.from_numpy(y).to(device)
        start = time()
        reg.fit(X, y)
        end = time()
        results_torch[device].append(end - start)


In [None]:
plt.plot(no_inputs, results_torch["cuda"], label="torch-cuda")
plt.plot(no_inputs, results_torch["cpu"], label="torch-cpu")
plt.plot(no_inputs, results_numpy, label="numpy")
plt.legend()