In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns

In [2]:
data = {
    'Product_Age_Months': np.random.randint(1, 25, 40),  # Independent Variable 1
    'Marketing_Budget_1000s': np.round(np.random.uniform(1, 10, 40), 2),  # Independent Variable 2
    'Seasonal_Sales_Boost': np.random.choice(['Low', 'Medium', 'High'], 40),  # Categorical Variable (will need encoding for SLR)
    'Product_Price': np.round(np.random.uniform(50, 150, 40), 2),  # Independent Variable 3
    'Discount_Rate': np.round(np.random.uniform(0, 0.3, 40), 2),  # Independent Variable 4
    'Total_Sales': np.round(np.random.uniform(500, 2500, 40), 2)  # Dependent Variable (Target)
}

# Convert to DataFrame
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Product_Age_Months,Marketing_Budget_1000s,Seasonal_Sales_Boost,Product_Price,Discount_Rate,Total_Sales
0,21,2.98,Low,67.01,0.06,1402.63
1,24,5.57,High,109.98,0.07,520.01
2,13,3.16,High,130.09,0.07,1710.32
3,5,8.15,High,135.76,0.27,835.64
4,20,6.64,Low,62.24,0.08,1288.63


In [3]:
x = df.drop('Total_Sales' , axis = 1)
y = df['Total_Sales']

In [4]:
num_features = x.select_dtypes(include='number').columns.tolist()
cat_features = x.select_dtypes(include='object').columns.tolist()

In [5]:
from sklearn.preprocessing import StandardScaler , OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

numeric_transformer = Pipeline(steps=[
    ('scaler' , StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('encoder' , OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num' , numeric_transformer , num_features) , 
        ('cat' , categorical_transformer , cat_features)
    ]
)

preprocessed_data = pd.DataFrame(preprocessor.fit_transform(x))
column_names = [col.replace('num__' , '').replace('cat__' , '') for col in preprocessor.get_feature_names_out()]
x = pd.DataFrame(preprocessor.fit_transform(x) , columns=column_names)
x.head(3)

Unnamed: 0,Product_Age_Months,Marketing_Budget_1000s,Product_Price,Discount_Rate,Seasonal_Sales_Boost_High,Seasonal_Sales_Boost_Low,Seasonal_Sales_Boost_Medium
0,1.21742,-1.225227,-1.009333,-1.139044,0.0,1.0,0.0
1,1.682676,-0.147693,0.494961,-1.010701,1.0,0.0,0.0
2,-0.023263,-1.15034,1.198972,-1.010701,1.0,0.0,0.0


In [6]:
# Splitting the data to test train set
from sklearn.model_selection import train_test_split

x_train , x_test , y_train , y_test = train_test_split(x , y , test_size= 0.25 , random_state=42)

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error , r2_score

model = LinearRegression()
model.fit(x_train , y_train)
y_pred = model.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse:.2f}, R2 Score: {r2:.2f}")

MSE: 580181.91, R2 Score: -1.13
