### EDA

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("Salary_Data.csv")
df.head()

In [None]:
x = df['Work Experience (Years)'].to_numpy()
y = df['Salary (INR)'].to_numpy()

In [None]:
# Plot the data points
plt.scatter(x, y, marker='x', c='r')
# Set the title
plt.title("Housing Prices")
# Set the y-axis label
plt.ylabel('Price (in 1000s of dollars)')
# Set the x-axis label
plt.xlabel('Size (1000 sqft)')
plt.show()

### Try to Fit using Lenear Regression

In [None]:
def predict_y(w,x,b):
    y_pred = np.dot(w,x) + b
    return y_pred

def calculate_cost(y,y_pred):
    diff = np.subtract(y_pred,y)
    sqr_diff = np.square(diff)
    m = len(y)
    j = (1/(2*m)) * np.sum(sqr_diff)
    return j

def Calculate_gradient(x,y,y_pred):
    diff = np.subtract(y_pred,y)
    m = len(y)
    dj_dw = (1/m) * np.dot(diff,x.T)
    dj_db = (1/m) * np.sum(diff)
    return dj_dw, dj_db

def optimize_weights(w,x,b,y,iter,alpha):
    dj_dw = dj_db = 0
    w_all = np.zeros(iter)
    b_all = np.zeros(iter)
    cost_all = np.zeros(iter)
    
    for i in range(0,iter):
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        y_pred = predict_y(w,x,b)
        cost = calculate_cost(y,y_pred)
        print(f"For Iteration{i} w = {w}, b = {b} and cost = {cost}")
        #print(f"dj_dw = {dj_dw} , dj_db = {dj_db}")
        dj_dw, dj_db = Calculate_gradient(x,y,y_pred)
        w_all[i] = w
        b_all[i] = b
        cost_all[i] = cost
        
    return w_all,b_all,cost_all

In [None]:
w = 1000
b = 20000
x = df['Work Experience (Years)'].to_numpy()
y = df['Salary (INR)'].to_numpy()
m = len(x)
iter = 100000
alpha = 0.001
w_all, b_all, cost_all = optimize_weights(w,x,b,y,iter,alpha)

In [None]:
min_value = np.min(cost_all) # Finding the minimum value
min_index = np.argmin(cost_all) # Finding the position (index) of the minimum value
cost = cost_all[min_index]
w = w_all[min_index]
b = b_all[min_index]
print(f"Optimize value of w = {w}, b = {b}, with Cost = {cost} at iteration = {min_index}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Plotting the weight values over iterations
plt.figure(figsize=(8, 6))

# Plot for cost_all
plt.plot(cost_all, color='red')
plt.title('Cost Function Value over Iterations')
plt.xlabel('Iterations')
plt.ylabel('Cost')

# Adjust the layout and show the plots
plt.tight_layout()
plt.show()

In [None]:
plt.plot(54000 + np.arange(len(cost_all[54000:])), cost_all[54000:],color='red')
plt.title("Cost vs. iteration") 
plt.ylabel('Cost')              
plt.xlabel('iteration step')   
plt.show()

In [None]:
w = 11999.999985947386
b = -1666.666474989303
y_pred = predict_y(w,x,b)

# Plot the data points
plt.scatter(x, y_pred, marker='x', c='blue',label='Predicted Data')
plt.scatter(x, y, marker='x', c='r',label='Actual Data')

# Define two points using NumPy arrays
x_values = np.array([0, 20])  # x = 0 and x = 5
y_values = w * x_values + b  # Calculate y for both points

# Plotting the line
plt.plot(x_values, y_values, label=f'y = {w}x + {b}', color='blue')

# Set the title
plt.title("Salary Trends")
# Set the y-axis label
plt.ylabel('Salary (in INR)')
# Set the x-axis label
plt.xlabel('Work Experience (Years)')

# Show the legend
plt.grid(True)
plt.legend()
plt.show()

### Try to fit using Non Linear Regression

In [None]:
# Constants
w1 = 500
w2 = 2000
b = 3000
w = np.array([[500,2000]])

In [None]:
x = df['Work Experience (Years)'].to_numpy()
x2 = np.array([x])
x1 = np.square(x)
x1 = np.array([x1])
print(f"x2 = {x2}")
print(f"x1 = {x1}")

In [None]:
import numpy as np

# Predict Y
def predict_y(w,x,b):
    y_pred = np.dot(w,x)+b
    return y_pred

# Calculate cost of model
def calculate_cost(y,y_pred):
    m = len(y)
    diff = np.subtract(y_pred,y)
    sqr_diff = np.square(diff)
    j = (1/(2*m)) * np.sum(sqr_diff)
    return j

# Calculate Gradient
def calculate_gradient(x,y,y_pred):
    m = len(y)
    diff = np.subtract(y_pred,y)
    dj_dw = (1/m) * np.dot(diff,x.T)
    dj_db = (1/m) * np.sum(diff)
    return dj_dw,dj_db

# Optimize weights
def optimize_weights(w,x,b,y,iter,alpha):
    dj_dw = np.zeros(2)
    dj_db = 0
    w_all = np.zeros((iter,2))
    b_all = np.zeros(iter)
    j_all = np.zeros(iter)
        
    for i in range(0,iter):
        w = w - (alpha * dj_dw)
        b = b - (alpha * dj_db)
        y_pred = predict_y(w,x,b)
        j = calculate_cost(y,y_pred)
        dj_dw, dj_db = calculate_gradient(x,y,y_pred)
        print(f"Iteration {i}: w = {w}, b = {b:.4f}, Cost = {j:.4f}")
        #print(f"dj_dw = {dj_dw},dj_db = {dj_db}")
        w_all[i,] = w
        b_all[i] = b
        cost_all[i] = j
    
    return w_all,b_all,cost_all 

In [None]:
w = np.array([0.5, 0.01])
x = df['Work Experience (Years)'].to_numpy()
x2 = x
x1 = np.square(x)
x = np.array([x1,x2])
b = 0.2
y = df['Salary (INR)'].to_numpy()
alpha = 0.001
iter = 1000

In [None]:
w_all,b_all,cost_all = optimize_weights(w,x,b,y,iter,alpha)

In [None]:
min_value = np.min(cost_all) # Finding the minimum value
min_index = np.argmin(cost_all) # Finding the position (index) of the minimum value
cost = cost_all[min_index]
w = w_all[min_index]
b = b_all[min_index]
print(f"Optimize value of w = {w}, b = {b}, with Cost = {cost} at iteration = {min_index}")

### Try to fit using Non Linear Regression (Using Feature Scalling)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("Salary_Data.csv")
df.head()

In [None]:
x = df['Work Experience (Years)'].to_numpy()
x2 = np.array(x)
x1 = np.square(x)
x1 = np.array(x1)

In [None]:
# Constants
w1 = 500
w2 = 2000
b = 30000
w = np.array([[500,2000]])

In [None]:
# Calculate Z-Score of Each Feature
mu_x1 = np.mean(x1)
sd_x1 = np.std(x1)
mu_x2 = np.mean(x2)
sd_x2 = np.std(x2)
z1 = np.divide(np.subtract(x1, mu_x1), sd_x1)
z2 = np.divide(np.subtract(x2, mu_x2), sd_x2)
z = np.array([z1,z2])

In [None]:
# Predict Y
def predict_y(w,z,b):
    y_pred = np.dot(w,z) + b
    return y_pred

In [None]:
import numpy as np

# Predict Y
def predict_y(w,z,b):
    y_pred = np.dot(w,z) + b
    return y_pred

# Calculate cost of model
def calculate_cost(y,y_pred):
    m = len(y)
    diff = np.subtract(y_pred,y)
    sqr_diff = np.square(diff)
    j = (1/(2*m)) * np.sum(sqr_diff)
    return j

# Calculate Gradient
def calculate_gradient(z,y,y_pred):
    m = len(y)
    diff = np.subtract(y_pred,y)
    dj_dw = (1/m) * np.dot(diff,z.T)
    dj_db = (1/m) * np.sum(diff)
    return dj_dw,dj_db

def optimize_weights(w,z,b,y,iter,alpha):
    dj_dw = np.zeros(2)
    dj_db = 0
    w_all = np.zeros((iter,2))
    b_all = np.zeros(iter)
    cost_all = np.zeros(iter)
        
    for i in range(0,iter):
        w = w - (alpha * dj_dw)
        b = b - (alpha * dj_db)
        y_pred = predict_y(w,z,b)
        j = calculate_cost(y,y_pred)
        dj_dw, dj_db = calculate_gradient(z,y,y_pred)
        print(f"Iteration {i}: w = {w}, b = {b:.4f}, Cost = {j:.4f}")
        #print(f"dj_dw = {dj_dw},dj_db = {dj_db}")
        w_all[i,] = w
        b_all[i] = b
        cost_all[i] = j
    
    return w_all,b_all,cost_all 

In [None]:
w = np.array([ 0.5, 0.01])
z = np.array([z1,z2])
b = 0.1
y = df['Salary (INR)'].to_numpy()
alpha = 0.001
iter = 10000

In [None]:
w_all,b_all,cost_all = optimize_weights(w,z,b,y,iter,alpha)

In [None]:
len(cost_all)

In [None]:
min_value = np.min(cost_all) # Finding the minimum value
min_index = np.argmin(cost_all) # Finding the position (index) of the minimum value
cost = cost_all[min_index]
w = w_all[min_index]
b = b_all[min_index]
print(f"Optimize value of w = {w}, b = {b}, with Cost = {cost} at iteration {min_index}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Plotting the weight values over iterations
plt.figure(figsize=(8, 6))

# Plot for cost_all
plt.plot(cost_all, color='red')
plt.title('Cost Function Value over Iterations')
plt.xlabel('Iterations')
plt.ylabel('Cost')

# Adjust the layout and show the plots
plt.tight_layout()
plt.show()

In [None]:
plt.plot(9900 + np.arange(len(cost_all[9900:])), cost_all[9900:],color='red')
plt.title("Cost vs. iteration") 
plt.ylabel('Cost')              
plt.xlabel('iteration step')   
plt.show()

In [None]:
y

In [None]:
x

In [None]:
# Predict Y
w = np.array([62719.39271241, 12110.60141646])
w1 = w[0]
w2 = w[1]
b = 118333.33333332607

y_pred = np.zeros(21)

for i in range(0,21):
    y_pred[i] = w1*(x[i]**2) + w2*(x[i]) + b

print(len(y_pred))
print(y_pred)

In [None]:
np.shape(y_pred)

In [None]:
print(len(y))
print(y)

In [None]:
x_values = np.array([0, 20]) 
x_values

In [None]:
# Plotting the scatter plot for y (actual) and y_pred (predicted)
plt.figure(figsize=(10, 6))

# Scatter plot for actual values
plt.scatter(x, y, color='blue', label='Actual Values')

# Scatter plot for predicted values
plt.scatter(x, y_pred, color='red', label='Predicted Values', marker='x')

# Plot the curve for predicted values
plt.plot(x, y_pred, color='green', label='Y_pred Curve')

# Adding labels and title
plt.xlabel('X')
plt.ylabel('Y / Y_pred')
plt.title('Actual vs Predicted Values')
plt.legend()
plt.grid(True)

# Display the plot
plt.show()


In [None]:
# Calculate cost of model
def calculate_cost(y,y_pred):
    m = len(y)
    diff = np.subtract(y,y_pred)
    sqr_diff = np.square(diff)
    j = (1/(2*m)) * np.sum(sqr_diff)
    return j

cost = calculate_cost(y,y_pred)
print(f" Cost = {cost}")