In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("/content/diamond_processed.csv")

In [3]:
# for Linear Regression
from sklearn.linear_model import LinearRegression

def linear_model(x_train, y_train):
  print("Linear Regression")
  linear_regression = LinearRegression()
  linear_regression.fit(x_train, y_train)
  return linear_regression

In [4]:
# for Lasso Regression
from sklearn.linear_model import Lasso

def lasso_model(x_train, y_train):
  print("Lasso Regression")
  lasso_regression = Lasso(alpha=0.8, max_iter=10000)
  lasso_regression.fit(x_train, y_train)
  return lasso_regression

In [5]:
# for Ridge Regression
from sklearn.linear_model import Ridge

def ridge_model(x_train, y_train):
  print("Ridge Regression")
  ridge_regression = Ridge(alpha=0.9)
  ridge_regression.fit(x_train, y_train)
  return ridge_regression

In [6]:
# for SGD Regression
from sklearn.linear_model import SGDRegressor

def sgd_model(x_train, y_train):
  print("SGD Regression")
  sgd_regression = SGDRegressor(max_iter=2000)
  sgd_regression.fit(x_train, y_train)
  return sgd_regression

In [7]:
# Build and Train Model
def build_and_train_model(data, target_name, reg_fn):
  X = data.drop(target_name, axis=1)
  Y = data[target_name]

  x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

  model = reg_fn(x_train, y_train)

  print("Training Score : ", model.score(x_train, y_train))

  y_pred = model.predict(x_test)
  print("Testing Score : ", r2_score(y_test, y_pred))

  return {"model" : model, "x_train" : x_train, "y_train" : y_train, "x_test" : x_test, "y_test" : y_test, "y_pred" : y_pred}

In [8]:
linear_reg = build_and_train_model(data, "price", linear_model)

Linear Regression
Training Score :  0.9140454762243037
Testing Score :  0.9087349181011484


In [9]:
lasso_reg = build_and_train_model(data, "price", lasso_model)

Lasso Regression
Training Score :  0.9139391162445358
Testing Score :  0.9087147606693013


In [10]:
ridge_reg = build_and_train_model(data, "price", ridge_model)

Ridge Regression
Training Score :  0.9140387035715241
Testing Score :  0.9087540665820765


In [11]:
sgd_reg = build_and_train_model(data, "price", sgd_model)

SGD Regression
Training Score :  -1.1296012347715155e+24
Testing Score :  -1.0938919772762764e+24
