In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, SGDRegressor
from sklearn.pipeline import Pipeline,make_pipeline
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.svm import SVR
from sklearn.compose import ColumnTransformer
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error,accuracy_score
# Decision Trees & Random Forest
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import set_config
# Gradient Boosting (XGBoost, LightGBM)
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor


In [None]:
df=pd.read_csv('/kaggle/input/home-value-insights/house_price_regression_dataset.csv')
df.describe()

In [None]:
x=df.iloc[:,:-1]
y=df.iloc[:,-1]
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=3, test_size=0.2)
x_train

In [None]:
plt.scatter(df['Square_Footage']*0.0929030,y)
plt.xlabel("area of plot ")
plt.ylabel("price")
plt.show()

In [None]:
df.isna().count()
df.dropna()
df.isna().sum()

MAKING A PREPROCESSING PIPELINE

In [None]:
steps1=[('standardscaler',StandardScaler()),('pca',PCA(n_components=3))]
steps2=[('standardscaler',StandardScaler())]
pipe1=Pipeline(steps1)
pipe2=Pipeline(steps2)
x1_train = pipe1.fit_transform(x_train)
x2_train = pipe2.fit_transform(x_train)
x1_test = pipe1.transform(x_test)
x2_test = pipe2.transform(x_test)

print(f"x1_train shape: {x1_train.shape}, y_train shape: {y_train.shape}")
models = [LinearRegression(), Ridge(), Lasso(), ElasticNet(), SGDRegressor()]


accuracy1=[]
# For PCA-transformed data
print("Results with PCA preprocessing:")
for model in models:
    model_name = type(model).__name__
    model.fit(x1_train, y_train)
    
    y_pred = model.predict(x1_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    
  
    r2 = r2_score(y_test, y_pred)
    accuracy1.append(r2)
    print(f"{model_name} - MAE: {mae:.4f}, MSE: {mse:.4f}, R² Score: {r2:.4f}")

# without PCA-reduced data
print("Results with only StandardScaler preprocessing:")
accuracy2=[]
for model in models:
    model_name = type(model).__name__
    model.fit(x2_train, y_train)
    
    y_pred = model.predict(x2_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    accuracy2.append(r2)
    print(f"{model_name} - MAE: {mae:.4f}, MSE: {mse:.4f}, R² Score: {r2:.4f}")



WORK GREAT WITHOUT PCA(DIMENSION REDUCTION)

In [None]:
models=['LinearRegression()', 'Ridge()', 'Lasso()', 'ElasticNet()', 'SGDRegressor()']


# Create the bar chart
plt.figure(figsize=(10, 6))
plt.bar(models, accuracy1, color='skyblue')

# Add title and labels
plt.title('Model Accuracy Comparison', fontsize=14)
plt.xlabel('Model', fontsize=12)
plt.ylabel('Accuracy ', fontsize=12)

# Display the chart
plt.xticks(rotation=45)
plt.show()

In [None]:
# Create the bar chart
plt.figure(figsize=(10, 6))
plt.bar(models, accuracy2, color='skyblue')

# Add title and labels
plt.title('Model Accuracy Comparison', fontsize=14)
plt.xlabel('Model', fontsize=12)
plt.ylabel('Accuracy ', fontsize=12)

# Display the chart
plt.xticks(rotation=45)
plt.show()

WITHOUT PIPELINE

In [None]:
# Importing necessary libraries
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, SGDRegressor
from sklearn.preprocessing import StandardScaler

# Preparing the feature (only the first column) for training and testing
X_train_col = x_train.iloc[:, [0]]  # Select first column of training data
X_test_col = x_test.iloc[:, [0]]    # Select first column of testing data

# Standardizing the feature to bring all models to similar scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_col)
X_test_scaled = scaler.transform(X_test_col)

# Initializing different regression models
models = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    ElasticNet(),
    SGDRegressor(max_iter=1000, tol=1e-3)
]

# Names and styles for plotting
model_names = ['LinearRegression', 'Ridge', 'Lasso', 'ElasticNet', 'SGDRegressor']
colors = ['r', 'g', 'b', 'm', 'c']  # Different colors for different models
linestyles = ['-', '--', '-.', ':', (0, (5, 10))]  # Different line styles

# Start plotting
plt.figure(figsize=(10, 6))

# Scatter plot of original data points in light gray
plt.scatter(df['Square_Footage'], y, color='#FF9999', label='Original Data', alpha=0.6)

# Plot predictions for each model
for model, name, color, style in zip(models, model_names, colors, linestyles):
    model.fit(X_train_scaled, y_train)  # Train the model
    y_pred = model.predict(X_test_scaled)  # Predict using the model

    # Plot the model's prediction line
    plt.plot(X_test_col.values.flatten(), y_pred, label=name, color=color, linestyle=style)

# Adding plot title and axis labels
plt.title("Regression Models on First Column (Different Line Styles)")
plt.xlabel("Square Footage (First Column of X)")
plt.ylabel("Predicted Price (y)")

# Displaying the legend and grid for better readability
plt.legend()
plt.grid(True)

# Show the final plot
plt.show()
