In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [20]:
def gradient_descent(X, y, m, b, lr, num_iterations):
    for _ in range(num_iterations):
        y_pred = m * X + b
        d_m = -2 * np.dot(X, (y - y_pred))
        d_b = -2 * np.dot(np.ones(len(X)), (y - y_pred)) 
        m -= lr * d_m / len(X)
        b -= lr * d_b / len(X)
    return m, b

def mse(y_pred,y):
    MSE = np.mean((y - y_pred) ** 2)
    return MSE

def r_squared(y_pred, y):
    ssr = np.sum((y - y_pred) ** 2)
    sst = np.sum((y - np.mean(y)) ** 2)
    return 1 - (ssr / sst)

In [21]:
concrete_data = pd.read_excel("data/Concrete_Data.xls")
concrete_data.reset_index(drop=True, inplace=True)

In [22]:
#split dataset
X = concrete_data.drop(columns=['Concrete compressive strength(MPa, megapascals) '])
y = concrete_data["Concrete compressive strength(MPa, megapascals) "]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [41]:
results = {
    "column": [],
    "new m": [],
    "new b": [],
    "MSE": [],
    "R-squared": []
}

In [35]:
for column in X_train:
  X = X_train[column]
  X_normalized = (X - X.min()) / (X.max() - X.min())
  m_new, b_new = gradient_descent(X_normalized,y_train, m=1, b=1, lr=0.1, num_iterations=1000)
  y_pred = m_new * X_normalized + b_new
  MSE= mse(y_pred,y_train)
  r_2 = r_squared(y_pred,y_train)
  
  results["column"].append(column)
  results["new m"].append(m_new)
  results["new b"].append(b_new)
  results["MSE"].append(MSE)
  results["R-squared"].append(r_2)



In [38]:
result = pd.DataFrame(results)
result

Unnamed: 0,column,new m,new b,MSE,R-squared
0,Cement (component 1)(kg in a m^3 mixture),34.117377,21.731078,213.9998,0.246699
1,Blast Furnace Slag (component 2)(kg in a m^3 m...,10.812098,33.620355,277.24435,0.024072
2,Fly Ash (component 3)(kg in a m^3 mixture),-7.694835,37.955435,277.712366,0.022424
3,Water (component 4)(kg in a m^3 mixture),-27.801849,49.181797,261.081065,0.080968
4,Superplasticizer (component 5)(kg in a m^3 mix...,32.118134,29.711354,248.629605,0.124799
5,Coarse Aggregate (component 6)(kg in a m^3 mi...,-12.644262,42.199851,275.717145,0.029448
6,Fine Aggregate (component 7)(kg in a m^3 mixture),-13.122283,41.720674,276.952458,0.025099
7,Age (day),33.586863,31.829421,252.755058,0.110277


In [42]:
for column in X_train:
  X = X_train[column]
  m_new, b_new = gradient_descent(X_normalized,y_train, m=1, b=1, lr=0.1, num_iterations=1000)
  y_pred = m_new * X + b_new
  MSE= mse(y_pred,y_train)
  r_2 = r_squared(y_pred,y_train)
  
  results["column"].append(column)
  results["new m"].append(m_new)
  results["new b"].append(b_new)
  results["MSE"].append(MSE)
  results["R-squared"].append(r_2)

In [43]:
result = pd.DataFrame(results)
result

Unnamed: 0,column,new m,new b,MSE,R-squared
0,Cement (component 1)(kg in a m^3 mixture),33.586863,31.829421,103468400.0,-364218.1
1,Blast Furnace Slag (component 2)(kg in a m^3 m...,33.586863,31.829421,14728040.0,-51843.18
2,Fly Ash (component 3)(kg in a m^3 mixture),33.586863,31.829421,7799670.0,-27454.63
3,Water (component 4)(kg in a m^3 mixture),33.586863,31.829421,37753160.0,-132893.9
4,Superplasticizer (component 5)(kg in a m^3 mix...,33.586863,31.829421,79201.68,-277.7979
5,Coarse Aggregate (component 6)(kg in a m^3 mi...,33.586863,31.829421,1075917000.0,-3787335.0
6,Fine Aggregate (component 7)(kg in a m^3 mixture),33.586863,31.829421,679695700.0,-2392597.0
7,Age (day),33.586863,31.829421,6336323.0,-22303.5
