<a href="https://colab.research.google.com/github/yashveersinghsohi/Dive_into_Deep_Learning_Practice/blob/chapter_3/chapter_3/chapter_3_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression Using Sklearn

## Imports

In [14]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

## Data

In [15]:
dataset = fetch_california_housing()

In [16]:
X, y = dataset['data'], dataset['target']
features_df = pd.DataFrame(data=X, columns=dataset['feature_names'])
targets_df = pd.DataFrame(data=y, columns=dataset['target_names'])
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42, test_size=0.2)

## Getting Model Parameters

In [17]:
lin_reg_sklearn = LinearRegression()
lin_reg_sklearn.fit(X_train, y_train)

In [18]:
lin_reg_sklearn.coef_.round(4), lin_reg_sklearn.intercept_.round(4)

(array([ 0.4487,  0.0097, -0.1233,  0.7831, -0.    , -0.0035, -0.4198,
        -0.4337]),
 -37.0233)

## Model Performance

**Train**

In [20]:
mse = mean_squared_error(y_true=y_train, y_pred=lin_reg_sklearn.predict(X_train)).round(4)
rmse = np.sqrt(mse).round(4)
mse, rmse

(0.5179, 0.7197)

**Val**

In [19]:
mse = mean_squared_error(y_true=y_val, y_pred=lin_reg_sklearn.predict(X_val)).round(4)
rmse = np.sqrt(mse).round(4)
mse, rmse

(0.5559, 0.7456)

# Concise Neural Network Style Implementation

## Imports

In [40]:
import pandas as pd
import numpy as np
import torch.nn as nn
from torch.optim import SGD
from torch.utils.data import DataLoader, TensorDataset, Dataset

## Preparing Dataset

In [21]:
dataset = fetch_california_housing()
X, y = dataset['data'], dataset['target']
features_df = pd.DataFrame(data=X, columns=dataset['feature_names'])
targets_df = pd.DataFrame(data=y, columns=dataset['target_names'])
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42, test_size=0.2)

In [29]:
class MyDataset(Dataset):
  def __init__(self, data, targets):
    self.data = data
    self.targets = targets

  def __len__(self):
    return len(self.targets)

  def __getitem__(self, idx):
    X = self.data[idx]
    y = self.targets[idx]
    return X, y

In [30]:
train_dataset = MyDataset(data=X_train, targets=y_train)
val_dataset = MyDataset(data=X_val, targets=y_val)

In [39]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=64, shuffle=True)

## Preparing the Model

In [41]:
class LinearRegressionNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.net = nn.LazyLinear(out_features=1)
    self.net.weight.data.normal_(mean=0, std=0.01)
    self.net.bias.data.fill_(value=0)

  def forward(self, X):
    return self.net(X)

  def get_w_b(self):
    return (self.net.weight.data, self.net.bias.data)

In [42]:
lin_reg_nn = LinearRegressionNN()



## Loss Function and Optimizers

In [47]:
loss_fn = nn.MSELoss()
optimizer = SGD(params=lin_reg_nn.parameters(), lr=0.001)

## Training

In [None]:
class Trainer():
  def __init__(self, model, optimizer, loss_fn, train_dataloader, val_dataloader):
    self.model = self.model
    self.optimizer = self.optimizer
    self.loss_fn = self.loss_fn
    self.train_dataloader = self.train_dataloader
    self.val_dataloader = self.val_dataloader

  def train_one_epoch(self):
    pass

  def train_model(self):
    pass