In [8]:
import statsmodels.api as sm  # For OLS Regression
# Import necessary libraries
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

# Example data (Experience - X, Salary - Y)
X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)  # Independent variable (Experience)
Y = np.array([30, 35, 40, 45, 50])  # Dependent variable (Salary)

# Step 3: Convert data into Polynomial Features (degree 2 for quadratic relationship)
degree = 2  # You can change this for higher degrees
poly = PolynomialFeatures(degree=degree)  # Creating polynomial feature transformer
X_poly = poly.fit_transform(X)  # Transforming X to polynomial features

# Check the transformed polynomial features
print("Polynomial Features:")
print(X_poly)


Polynomial Features:
[[ 1.  1.  1.]
 [ 1.  2.  4.]
 [ 1.  3.  9.]
 [ 1.  4. 16.]
 [ 1.  5. 25.]]


In [11]:
# Step 4: Create and train the Polynomial Regression model
model = LinearRegression()  # Initializing the model
model.fit(X_poly, Y)  # Training the model

In [9]:
# 6. Ordinary Least Squares (OLS) Summary - Gives detailed statistical summary of the regression
X_ols = sm.add_constant(X_poly)  # Add intercept term for OLS regression
ols_model = sm.OLS(Y, X_ols).fit()  # Fit OLS model
print("\nOLS Regression Summary:\n")
print(ols_model.summary())  # Display detailed statistical summary


OLS Regression Summary:

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 3.634e+28
Date:                Sat, 01 Mar 2025   Prob (F-statistic):           2.75e-29
Time:                        12:17:59   Log-Likelihood:                 147.53
No. Observations:                   5   AIC:                            -289.1
Df Residuals:                       2   BIC:                            -290.2
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         25.0000   1.

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
 
# Create a simple dataset
data = {
    'Size': [1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200],
    'Bedrooms': [3, 3, 3, 4, 4, 4, 5, 5],
    'Age': [10, 15, 10, 20, 30, 15, 20, 25],
    'Price': [300000, 320000, 340000, 360000, 400000, 370000, 420000, 430000]
}
 
# Convert to DataFrame
df = pd.DataFrame(data)
 
# Define independent variables (features) and dependent variable (target)
X = df[['Size', 'Bedrooms', 'Age']]  #  3 Independent variables
y = df['Price']  # Dependent variable
 
# X = df[['Size', 'Bedrooms']]  #  2 Independent variables
# y = df['Price']  # Dependent variable
 
#X = df[['Size']]  #  1 Independent variables
#y = df['Price']  # Dependent variable
 
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# Create a linear regression model
model = LinearRegression()
 
# Train the model
model.fit(X_train, y_train)
 
# Make predictions on the test set
y_pred = model.predict(X_test)
 
# Coefficients and intercept
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)
 
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)
 
 
new_house = np.array([[1500,3,10]])
predicted_price = model.predict(new_house)
print(f"Predicted Price for new house: {predicted_price[0]}")

Intercept: 33576.01713061816
Coefficients: [  182.01284797 -6252.67665953  1391.86295503]
Mean Squared Error: 299314729.30776143
R-squared: 0.5210964331075817
Predicted Price for new house: 301755.888650963


