In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
import math

# Data
weights = np.array([10, 24, 40, 52, 60, 77, 82, 100, 120, 130, 145, 155, 170, 180, 200]).reshape(-1, 1)
calories = np.array([148, 154, 167, 176, 176, 191, 203, 207, 227, 230, 245, 251, 259, 290, 503])

# Perform linear regression
# model = LinearRegression()
scaler = StandardScaler()
model = make_pipeline(scaler,
                    SGDRegressor(max_iter=1000, tol=1e-3))
model.fit(weights, calories)
predicted_calories = model.predict(weights)

# Calculate mean and median for calories
mean_calories = np.mean(calories)
median_calories = np.median(calories)

# Prediction for 20g weight
weight_20 = np.array([[20]])
predicted_calories_20 = model.predict(weight_20)[0]

# Calculate RMSE and MSE
mse = mean_squared_error(calories, predicted_calories)
rmse = math.sqrt(mse)

# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(weights, calories, label="Data Points")
plt.plot(weights, predicted_calories, color="orange", label="Regression Line")
plt.axhline(mean_calories, color="blue", linestyle="--", label=f"Mean = {mean_calories:.2f}")
plt.axhline(median_calories, color="green", linestyle="--", label=f"Median = {median_calories:.2f}")
plt.scatter(weight_20, predicted_calories_20, color="purple", marker="x", label=f"Prediction for 20g = {predicted_calories_20:.2f}", zorder=5)

for i in range(len(weights)):
  label = None
  if i==0:
    label = 'Error'

  plt.plot([weights[i], weights[i]], [calories[i], predicted_calories[i]], color='red', linewidth=1, label=label)


plt.title(f"Weight vs. Calories with Prediction and Errors\nRMSE = {rmse:.2f}, MSE = {mse:.2f}")

plt.xlabel("Weight (g)")
plt.ylabel("Calories")
plt.grid(which="both", linestyle="--", alpha=0.7)
plt.xticks(range(0, 201, 50))
plt.yticks(range(100, 601, 100))
plt.legend()
plt.show()


In [None]:
mean_calories, median_calories

In [None]:
# Extract intercept and slope from the linear regression model
# intercept = model.intercept_
# slope = model.coef_[0]

# intercept, slope
# (106.22818471764359, 1.1867813781458552)

In [None]:
# Extract the scaler parameters from the pipeline
scaler_X = model.named_steps['standardscaler']
scaler_X_mean = scaler_X.mean_[0]
scaler_X_scale = scaler_X.scale_[0]

# Extract coefficients and intercept from SGDRegressor
regressor = model.named_steps['sgdregressor']
slope_scaled = regressor.coef_[0]
intercept_scaled = regressor.intercept_[0]

# Recover original slope and intercept
slope_original = slope_scaled / scaler_X_scale
intercept_original = intercept_scaled - slope_original * scaler_X_mean

intercept_original, slope_original

In [None]:
intercept_scaled, slope_scaled



In [None]:
106.2+ 20* 1.187

In [None]:
predicted_calories_20