# Reading the Dataset

In [None]:
# Importing required libraries
import pandas as pd

In [None]:
# Reading the dataset
data = pd.read_csv('/content/Profit.csv')

In [None]:
'''
Alternatively, this dataset can be read from AIM Mock datasets using the below line of code'''

data = pd.read_csv(
    'https://raw.githubusercontent.com/analyticsindiamagazine/MocksDatasets/main/Profit.csv'
    )


In [None]:
# Top 5 rows of the data
data.head()

Unnamed: 0,Marketing Spend,Profit
0,471784.1,192261.83
1,443898.53,191792.06
2,407934.54,191050.39
3,383199.62,182901.99
4,366168.42,166187.94


In [None]:
# Checking the shape of the data
print('Shape of the dataset (No. of rows, No. of columns):', data.shape)

Shape of the dataset (No. of rows, No. of columns): (200, 2)


# Defining the input-output features

In [None]:
# Defining input and output features
X = data.iloc[:, 0:-1].values
y = data.iloc[:, -1].values

In [None]:
# Checking the shape of input and output features
print('Shape of the input features:', X.shape)
print('Shape of the output features:', y.shape)

Shape of the input features: (200, 1)
Shape of the output features: (200,)


# Defining the training-test features

In [None]:
# Defining the training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [None]:
# Checking the shape of the training and test sets
print('Shape of the training input data:', X_train.shape)
print('Shape of the training output data:', y_train.shape)
print('Shape of the test input data:', X_test.shape)
print('Shape of the test output data:', y_test.shape)

Shape of the training input data: (180, 1)
Shape of the training output data: (180,)
Shape of the test input data: (20, 1)
Shape of the test output data: (20,)


# Defining and training a K-NN regression model

## Initializing a K-NN Regression model

In [None]:
# Defining a KNN Regression model
from sklearn.neighbors import KNeighborsRegressor
regressor = KNeighborsRegressor()

## Hyperparameter tuning

In [None]:
# Finding the optimal value of K
from sklearn.model_selection import GridSearchCV

k_range = list(range(1, 21))
param_grid = dict(n_neighbors=k_range)
grid = GridSearchCV(regressor, param_grid, cv=10, scoring='r2', return_train_score=False,verbose=1)
grid.fit(X_train, y_train)

Fitting 10 folds for each of 20 candidates, totalling 200 fits


GridSearchCV(cv=10, estimator=KNeighborsRegressor(),
             param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15, 16, 17, 18, 19, 20]},
             scoring='r2', verbose=1)

## Defining training the K-NN Regression model

In [None]:
# Defining the KNN regressor with optimal value of K
regressor = KNeighborsRegressor(n_neighbors=2)
regressor.fit(X_train, y_train)

KNeighborsRegressor(n_neighbors=2)

# Predicting and evaluating the predictions

In [None]:
# Making predictions on the test data
y_pred = regressor.predict(X_test)

In [None]:
# Comparing the predicted profits with actual profits
pd.DataFrame(data={'Predicted Profit': y_pred, 'Actual Profit': y_test})

Unnamed: 0,Predicted Profit,Actual Profit
0,64921.08,64926.08
1,128429.985,129917.04
2,99957.59,99937.59
3,152211.77,152161.77
4,103257.38,103322.38
5,128429.985,129957.04
6,79225.135,122776.86
7,118474.03,118424.03
8,108552.04,108502.04
9,64921.08,64926.08


In [None]:
# Mean Squared Error (MSE)
from sklearn.metrics import mean_squared_error
MSE=mean_squared_error(y_test, y_pred)
print('Mean Squared Error is:', MSE)

Mean Squared Error is: 95169375.35473497


In [None]:
# Root Mean Squared Error (RMSE)
import math
RMSE = math.sqrt(MSE)
print('Root Mean Squared Error is:', RMSE)

Root Mean Squared Error is: 9755.479247824525


In [None]:
# R-Squared
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print('R-Squared is:', r2)

R-Squared is: 0.8858429403678624
