In [None]:
import pandas as pd
import numpy as np
import sklearn

In [None]:
url = "https://drive.google.com/uc?export=download&id=1qgMi-w6IPC3Lg3l815vrQK0bZdz2ozTd"
fruits = pd.read_csv(url)

temp = np.array(fruits["Temperature_F"])
rain = np.array(fruits["Rainfall_mm"])
humid = np.array(fruits["Humidity_percent"])

n = len(temp)


input_matrix = np.column_stack([np.ones(n), temp, rain, humid])

input_matrix[:, 1:] = (
    input_matrix[:, 1:] - input_matrix[:, 1:].mean(axis=0)
) / input_matrix[:, 1:].std(axis=0)

mangoes = np.array(fruits["Mangoes_ton"])
oranges = np.array(fruits["Oranges_ton"])

mangoes_coefficients = np.zeros(4)
oranges_coefficients = np.zeros(4)

dx = 1e-2
tol = 1e-6
max_iter = 100000

for _ in range(max_iter):

    mangoes_gradient = -2 * input_matrix.T @ (mangoes - input_matrix @ mangoes_coefficients)
    oranges_gradient = -2 * input_matrix.T @ (oranges - input_matrix @ oranges_coefficients)

    if (np.linalg.norm(mangoes_gradient) < tol and
        np.linalg.norm(oranges_gradient) < tol):
        break

    mangoes_coefficients -= dx * mangoes_gradient
    oranges_coefficients -= dx * oranges_gradient

mangoes_accuracy = (1-(np.sum(np.abs(mangoes - input_matrix @ mangoes_coefficients))/n)/mangoes.mean(axis=0))*100
oranges_accuracy = (1-(np.sum(np.abs(oranges - input_matrix @ oranges_coefficients))/n)/oranges.mean(axis=0))*100

print("Accuracy for mangoes = " + str(mangoes_accuracy) + "%")
print("Accuracy for oranges = " + str(oranges_accuracy) + "%")




Accuracy for mangoes = 98.79636549886423%
Accuracy for oranges = 99.18454724798694%


In [None]:
url = "https://drive.google.com/uc?export=download&id=1PuMH9P2rGqaq9iagTe_u46N2M1bHu0rW"
dataset = pd.read_csv(url)

bhk = np.array(dataset["BHK"])
size = np.array(dataset["Size"])
floor_text = np.array(dataset["Floor"])
floor_words = [s.split() for s in floor_text]

def convToFloat(s):
    if s == "Ground":
        return 1
    elif s == "Upper":
        return 0
    elif s == "Lower":
        return -1
    else:
        return float(s) + 1

def isnotCarpet(s):
    if s == "Carpet Area":
      return 0.0
    else:
      return 1.0

def isFurnished(s):
    if s == "Furnished":
        return 1.0
    elif s == "Semi-Furnished":
        return 0.5
    else:
        return 0.0

floor = np.array([convToFloat(s[0]) for s in floor_words])
height = np.array([convToFloat(s[-1]) for s in floor_words])
floor_ratio = floor / height
carpet = np.array([isnotCarpet(s) for s in dataset["Area Type"]])
furnished = np.array([isFurnished(s) for s in dataset["Furnishing Status"]])
bathroom = np.array(dataset["Bathroom"])
cities = np.array(dataset["City"])
rent = np.array(dataset["Rent"])

city_dict = {
    "Bangalore" : 0,
    "Chennai" : 0,
    "Delhi" : 0,
    "Hyderabad" : 0,
    "Kolkata" : 0,
    "Mumbai" : 0
}
city_count = {
    "Bangalore" : 0,
    "Chennai" : 0,
    "Delhi" : 0,
    "Hyderabad" : 0,
    "Kolkata" : 0,
    "Mumbai" : 0
}

n = len(size)

for i in range(n):
  city_dict[cities[i]] += rent[i]
  city_count[cities[i]] += 1

city_encoded = np.empty((n))

for i in range(n):
  city_encoded[i] = city_dict[cities[i]]/city_count[cities[i]]

input_matrix = np.column_stack((np.ones(n), bhk, size, floor, height, floor_ratio, carpet, furnished, bathroom, city_encoded))

X = input_matrix[:,1:]
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X_scaled = (X - X_mean) / X_std
input_matrix = np.column_stack((np.ones(n), X_scaled))

coefficients = np.zeros(input_matrix.shape[1])

dx = 1e-6
max_iter = 1000
tol = 1e-3

for _ in range(max_iter):
    gradient = -2 * input_matrix.T @ (rent - input_matrix @ coefficients)
    if np.linalg.norm(gradient) < tol:
        break
    coefficients -= dx * gradient

#accuracy = (1-(np.sum(np.abs(rent - input_matrix @ coefficients))/n)/rent.mean(axis=0))*100

rms_error = np.sqrt(np.sum(np.abs(rent - input_matrix @ coefficients)**2)/n)


In [None]:
url = "https://drive.google.com/uc?export=download&id=1dQe6kal5qx-seuSifLdpu7mmQ8P7Z5OL"
func = pd.read_csv(url)

x = np.array(func["x"])
y = np.array(func["y"])

poly = sklearn.preprocessing.PolynomialFeatures(degree=2,include_bias=False)
X_poly = poly.fit_transform(x.reshape(-1, 1))

model = sklearn.linear_model.LinearRegression()
model.fit(X_poly, y)

y_pred = model.predict(X_poly)

print(y_pred - y)

[-1.083577   -2.01189418 -1.91272632 ...  1.6287143   1.44798316
  1.96587317]
