In [61]:
import pandas as pd
import numpy as np

# from sklearn.datasets import load_bostono,load_breast_cancer

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from statsmodels.stats.outliers_influence import variance_inflation_factor

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from scipy.stats import shapiro, kstest, normaltest, skew

import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import json

### Data Gathering

In [46]:
df = pd.read_excel("Cellphone.xlsx")
df

Unnamed: 0,Product_id,Price,Sale,weight,resoloution,ppi,cpu core,cpu freq,internal mem,ram,RearCam,Front_Cam,battery,thickness
0,203,2357,10,135.0,5.20,424,8,1.350,16.0,3.000,13.00,8.0,2610,7.4
1,880,1749,10,125.0,4.00,233,2,1.300,4.0,1.000,3.15,0.0,1700,9.9
2,40,1916,10,110.0,4.70,312,4,1.200,8.0,1.500,13.00,5.0,2000,7.6
3,99,1315,11,118.5,4.00,233,2,1.300,4.0,0.512,3.15,0.0,1400,11.0
4,880,1749,11,125.0,4.00,233,2,1.300,4.0,1.000,3.15,0.0,1700,9.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,1206,3551,4638,178.0,5.46,538,4,1.875,128.0,6.000,12.00,16.0,4080,8.4
157,1296,3211,8016,170.0,5.50,534,4,1.975,128.0,6.000,20.00,8.0,3400,7.9
158,856,3260,8809,150.0,5.50,401,8,2.200,64.0,4.000,20.00,20.0,3000,6.8
159,1296,3211,8946,170.0,5.50,534,4,1.975,128.0,6.000,20.00,8.0,3400,7.9


### EDA

In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 161 entries, 0 to 160
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Product_id    161 non-null    int64  
 1   Price         161 non-null    int64  
 2   Sale          161 non-null    int64  
 3   weight        161 non-null    float64
 4   resoloution   161 non-null    float64
 5   ppi           161 non-null    int64  
 6   cpu core      161 non-null    int64  
 7   cpu freq      161 non-null    float64
 8   internal mem  161 non-null    float64
 9   ram           161 non-null    float64
 10  RearCam       161 non-null    float64
 11  Front_Cam     161 non-null    float64
 12  battery       161 non-null    int64  
 13  thickness     161 non-null    float64
dtypes: float64(8), int64(6)
memory usage: 17.7 KB


#### Train test Split

In [49]:
x = df.drop(["Price", "Product_id"], axis=1)
y = df["Price"]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.25, random_state=10)
x_train

Unnamed: 0,Sale,weight,resoloution,ppi,cpu core,cpu freq,internal mem,ram,RearCam,Front_Cam,battery,thickness
129,499,154.0,5.5,534,4,2.700,32.000,3.000,13.0,2.1,3000,9.1
101,201,78.4,2.4,166,0,0.000,0.000,0.008,0.0,0.0,1100,12.9
60,58,175.0,5.5,401,6,1.530,32.000,3.000,21.0,8.0,3500,9.3
105,213,78.4,2.4,166,0,0.000,0.000,0.008,0.0,0.0,1100,12.9
52,43,169.0,5.7,515,4,1.875,64.000,4.000,12.0,5.0,3500,7.9
...,...,...,...,...,...,...,...,...,...,...,...,...
113,308,77.9,2.4,167,0,0.000,0.004,0.004,0.0,0.0,850,12.4
64,72,168.0,5.7,386,8,1.800,32.000,3.000,21.0,5.0,3050,7.5
15,17,141.0,5.0,294,4,1.200,8.000,1.500,8.0,1.2,2040,10.0
125,423,110.0,2.2,128,0,0.000,0.128,0.032,2.0,0.0,900,15.6


In [50]:
y_train

129    2466
101     833
60     2824
105     833
52     2859
       ... 
113     754
64     2685
15     1612
125     705
9      2580
Name: Price, Length: 120, dtype: int64

### Model training

In [51]:
lin_reg = LinearRegression()
lin_reg.fit(x_train, y_train)

LinearRegression()

### Evaluation

In [52]:
# for Training data
y_pred_train = lin_reg.predict(x_train)

mse = mean_squared_error(y_train, y_pred_train)
print("MSE : ",mse)

rmse = np.sqrt(mse)
print("RMSE : ",rmse)

mae = mean_absolute_error(y_train, y_pred_train)
print("MAE : ",mae)

r2 = r2_score(y_train, y_pred_train)
print("R2 squared : ",r2)

MSE :  22868.19031168521
RMSE :  151.22232081172808
MAE :  125.3879209318258
R2 squared :  0.9624505821528214


In [53]:
# for Testing data
y_pred_test = lin_reg.predict(x_test)

mse = mean_squared_error(y_test, y_pred_test)
print("MSE : ",mse)

rmse = np.sqrt(mse)
print("RMSE : ",rmse)

mae = mean_absolute_error(y_test, y_pred_test)
print("MAE : ",mae)

r2 = r2_score(y_test, y_pred_test)
print("R2 squared : ",r2)

MSE :  43363.55290146279
RMSE :  208.23917235108
MAE :  162.5301647226294
R2 squared :  0.916590917234747


In [54]:
x.columns

Index(['Sale', 'weight', 'resoloution', 'ppi', 'cpu core', 'cpu freq',
       'internal mem', 'ram', 'RearCam', 'Front_Cam', 'battery', 'thickness'],
      dtype='object')

In [55]:
x.rename({'cpu core':'cpu_core' ,'cpu freq':'cpu_freq', 
          'internal mem':'internal_mem'}, inplace=True, axis=1)
x.columns

Index(['Sale', 'weight', 'resoloution', 'ppi', 'cpu_core', 'cpu_freq',
       'internal_mem', 'ram', 'RearCam', 'Front_Cam', 'battery', 'thickness'],
      dtype='object')

In [56]:
def cellphone_price_pred(Sale, weight, resoloution, ppi, cpu_core, cpu_freq,
                         internal_mem, ram, RearCam, Front_Cam, battery, thickness):
    col_list = x.columns
    test = np.zeros(len(x.columns))
     
    test[0] = Sale
    test[1] = weight
    test[2] = resoloution
    test[3] = ppi
    test[4] = cpu_core
    test[5] = cpu_freq
    test[6] = internal_mem
    test[7] = ram
    test[8] = RearCam
    test[9] = Front_Cam
    test[10] = battery
    test[11] = thickness
    
    
    return lin_reg.predict([test])[0]

In [57]:
x.iloc[0,:]

Sale              10.00
weight           135.00
resoloution        5.20
ppi              424.00
cpu_core           8.00
cpu_freq           1.35
internal_mem      16.00
ram                3.00
RearCam           13.00
Front_Cam          8.00
battery         2610.00
thickness          7.40
Name: 0, dtype: float64

In [58]:
cellphone_price_pred(10,135,5.2,424,8,1.35,16,3,13,8,2610,7.4)



2676.281368504958

In [60]:
import pickle
with open("lin_reg.pkl", "wb") as f:
    pickle.dump(lin_reg, f)

In [62]:
import json
cellphone_price_pred = {'Columns' : x.columns.tolist()}
with open("Cellphone_data.json", 'w') as f:
    json.dump(cellphone_price_pred,f)