In [34]:
import numpy as np

class MyLinearRegression:
    def __init__(self):
        self.weights = None
        self.bias = 0.0

    def predict(self, features):
        if self.weights is None:
            raise ValueError("Model has not been trained yet. Call train() first.")
        return np.dot(features, self.weights) + self.bias

    def evaluate(self, features, true_values):
        predictions = self.predict(features)
        return np.mean((predictions - true_values) ** 2)

    def train(self, features, true_values, learning_rate, epochs):
        n = len(true_values)
        features = np.array(features, dtype=float)
        true_values = np.array(true_values, dtype=float)

        if self.weights is None:
            num_features = features.shape[1]
            self.weights = np.random.randn(num_features) * 0.01

        for _ in range(epochs):
            predictions = self.predict(features)
            error = predictions - true_values

            gradient_w = (2/n) * np.dot(features.T, error)
            gradient_b = (2/n) * np.sum(error)

            self.weights -= learning_rate * gradient_w
            self.bias -= learning_rate * gradient_b


In [37]:
import pandas as pd
from sklearn.linear_model import LinearRegression


data = pd.read_csv("car_price_dataset.csv")
features, true_values = data[["Mileage", "Year"]].values, data["Price"].values


features = np.array(features, dtype=float)
means = np.mean(features, axis=0)
stds = np.std(features, axis=0)
features_scaled = (features - means) / stds


my_linear_regression = MyLinearRegression()
my_linear_regression.train(features_scaled, true_values, learning_rate=0.01, epochs=1000)

mse = my_linear_regression.evaluate(features_scaled, true_values)
print("MSE:", mse)


model = LinearRegression()
model.fit(features_scaled, true_values)

existing_data_point = features[1]
existing_data_point_scaled = (existing_data_point - means) / stds
prediction1 = my_linear_regression.predict(existing_data_point_scaled)
print("Predicted price:", prediction1, "Actual:", true_values[1])

existing_data_point = features[0]
existing_data_point_scaled = (existing_data_point - means) / stds
prediction2 = my_linear_regression.predict(existing_data_point_scaled)
print("Predicted price:", prediction2, "Actual:", true_values[0])

existing_data_point = features[16]
existing_data_point_scaled = (existing_data_point - means) / stds
prediction3 = my_linear_regression.predict(existing_data_point_scaled)
print("Predicted price:", prediction3, "Actual:", true_values[16])

new_data_point = np.array([60000, 2015])
new_data_point_scaled = (new_data_point - means) / stds
prediction4 = my_linear_regression.predict(new_data_point_scaled)
print("Predicted price:", prediction4, "Scikit-Learn:", model.predict(new_data_point_scaled.reshape(1, -1)))

new_data_points = np.array([[60000, 2015], [70000, 2018]])
new_data_points_scaled = (new_data_points - means) / stds
prediction5 = my_linear_regression.predict(new_data_points_scaled)
print("Predicted prices:", prediction5, "Scikit-Learn:", model.predict(new_data_points_scaled))

MSE: 2502589.034663308
Predicted price: 11840.536815602598 Actual: 12092
Predicted price: 8589.515459013495 Actual: 8501
Predicted price: 12683.90478943929 Actual: 14837
Predicted price: 11653.403047612568 Scikit-Learn: [11653.40306698]
Predicted prices: [11653.40304761 12350.97877629] Scikit-Learn: [11653.40306698 12350.97879679]
