In [96]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
import numpy as np
import pandas as pd
import os

In [7]:
os.environ["KAGGLE_CONFIG_DIR"]="./.kaggle"

In [9]:
!kaggle datasets download -d gyanprakashkushwaha/laptop-price-prediction-cleaned-dataset

laptop-price-prediction-cleaned-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [14]:
!unzip laptop-price-prediction-cleaned-dataset.zip

Archive:  laptop-price-prediction-cleaned-dataset.zip
  inflating: laptop_data_cleaned.csv  


In [100]:
def get_score(model,x,y):
    score = cross_validate(model,x,y,cv=5)
#     print(s)
    return np.mean(score["test_score"])

In [90]:
data = pd.read_csv("laptop_data_cleaned.csv")
data.head()

Unnamed: 0,Company,TypeName,Ram,Weight,Price,TouchScreen,Ips,Ppi,Cpu_brand,HDD,SSD,Gpu_brand,Os
0,Apple,Ultrabook,8,1.37,11.175755,0,1,226.983005,Intel Core i5,0,128,Intel,Mac
1,Apple,Ultrabook,8,1.34,10.776777,0,0,127.67794,Intel Core i5,0,0,Intel,Mac
2,HP,Notebook,8,1.86,10.329931,0,0,141.211998,Intel Core i5,0,256,Intel,Others
3,Apple,Ultrabook,16,1.83,11.814476,0,1,220.534624,Intel Core i7,0,512,AMD,Mac
4,Apple,Ultrabook,8,1.37,11.473101,0,1,226.983005,Intel Core i5,0,256,Intel,Mac


In [91]:
le = LabelEncoder()
data["Company"] = le.fit_transform(data["Company"])
data["TypeName"] = le.fit_transform(data["TypeName"])
data["Cpu_brand"] = le.fit_transform(data["Cpu_brand"])
data["Gpu_brand"] = le.fit_transform(data["Gpu_brand"])
data["Os"] = le.fit_transform(data["Os"])

data.head()

Unnamed: 0,Company,TypeName,Ram,Weight,Price,TouchScreen,Ips,Ppi,Cpu_brand,HDD,SSD,Gpu_brand,Os
0,1,4,8,1.37,11.175755,0,1,226.983005,2,0,128,1,0
1,1,4,8,1.34,10.776777,0,0,127.67794,2,0,0,1,0
2,7,3,8,1.86,10.329931,0,0,141.211998,2,0,256,1,1
3,1,4,16,1.83,11.814476,0,1,220.534624,3,0,512,0,0
4,1,4,8,1.37,11.473101,0,1,226.983005,2,0,256,1,0


In [92]:
companies = set(data["Company"].tolist())
companies

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}

In [93]:
minmax = MinMaxScaler()
data_ = minmax.fit_transform(data)
data = pd.DataFrame(data_,columns=data.columns)
data.head()

Unnamed: 0,Company,TypeName,Ram,Weight,Price,TouchScreen,Ips,Ppi,Cpu_brand,HDD,SSD,Gpu_brand,Os
0,0.055556,0.8,0.096774,0.169576,0.573865,0.0,1.0,0.520844,0.5,0.0,0.125,0.5,0.0
1,0.055556,0.8,0.096774,0.162095,0.461693,0.0,0.0,0.141646,0.5,0.0,0.0,0.5,0.0
2,0.388889,0.6,0.096774,0.291771,0.336062,0.0,0.0,0.193326,0.5,0.0,0.25,0.5,0.5
3,0.055556,0.8,0.225806,0.284289,0.753442,0.0,1.0,0.496221,0.75,0.0,0.5,0.0,0.0
4,0.055556,0.8,0.096774,0.169576,0.657464,0.0,1.0,0.520844,0.5,0.0,0.25,0.5,0.0


In [86]:
# minmax = StandardScaler()
# data_ = minmax.fit_transform(data)
# data = pd.DataFrame(data_,columns=data.columns)
# data.head()

Unnamed: 0,Company,TypeName,Ram,Weight,Price,TouchScreen,Ips,Ppi,Cpu_brand,HDD,SSD,Gpu_brand,Os
0,-1.334619,1.135843,-0.087852,-1.003171,0.561156,-0.41496,1.604943,1.865121,-0.435472,-0.798909,-0.312418,-0.269945,-4.588083
1,-1.334619,1.135843,-0.087852,-1.048016,-0.08306,-0.41496,-0.623075,-0.449147,-0.435472,-0.798909,-0.998899,-0.269945,-4.588083
2,0.132751,0.333441,-0.087852,-0.270711,-0.804569,-0.41496,-0.623075,-0.133741,-0.435472,-0.798909,0.374062,-0.269945,-2.104822
3,-1.334619,1.135843,1.48177,-0.315556,1.592481,-0.41496,1.604943,1.714844,0.585441,-0.798909,1.747024,-1.817871,-4.588083
4,-1.334619,1.135843,-0.087852,-1.003171,1.041273,-0.41496,1.604943,1.865121,-0.435472,-0.798909,0.374062,-0.269945,-4.588083


In [94]:
y = data["Price"]
x = data.drop("Price",axis=1)

x_train, x_test, y_train, y_test = train_test_split(x,y,train_size=.5)

svr = SVR(kernel="poly")
svr.fit(x_train,y_train)
score = svr.score(x_test,y_test)
score

0.7905247850049989

In [95]:
result = cross_validate(svr,x,y,cv=20)
np.mean(result["test_score"])

0.7977962297828778

In [118]:
dec_reg = DecisionTreeRegressor(max_depth=10)
score = get_score(dec_reg,x,y)
score

0.7826533851857398

In [129]:
plf = PolynomialFeatures(degree=2)
lin_reg = LinearRegression()
x_poly = plf.fit_transform(x)
score = get_score(lin_reg,x_poly,y)
score

0.78908716327485