In [74]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [75]:
df = pd.read_csv("carprices.csv")
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [76]:
dummies = pd.get_dummies(df['Car Model'])
merged_df = pd.concat([df,dummies],axis="columns")
final = merged_df.drop(['Car Model','Sell Price($)'],axis="columns")

In [77]:
X = final.drop("BMW X5",axis="columns")
y = df['Sell Price($)']

In [78]:
model = LinearRegression()

In [79]:
model.fit(X,y)

LinearRegression()

In [80]:
model.predict([[45000,4,0,1]])

array([36991.31721031])

In [81]:
model.score(X,y)

0.9417050937281082

''' --------------------------------------------- '''

# With the ColumnTransformer

In [82]:
ct = ColumnTransformer([('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = df[['Car Model','Age(yrs)','Mileage']]
X = np.array(ct.fit_transform(X))
y = df['Sell Price($)']
X

array([[0.00e+00, 1.00e+00, 0.00e+00, 6.00e+00, 6.90e+04],
       [0.00e+00, 1.00e+00, 0.00e+00, 3.00e+00, 3.50e+04],
       [0.00e+00, 1.00e+00, 0.00e+00, 5.00e+00, 5.70e+04],
       [0.00e+00, 1.00e+00, 0.00e+00, 2.00e+00, 2.25e+04],
       [0.00e+00, 1.00e+00, 0.00e+00, 4.00e+00, 4.60e+04],
       [1.00e+00, 0.00e+00, 0.00e+00, 5.00e+00, 5.90e+04],
       [1.00e+00, 0.00e+00, 0.00e+00, 5.00e+00, 5.20e+04],
       [1.00e+00, 0.00e+00, 0.00e+00, 6.00e+00, 7.20e+04],
       [1.00e+00, 0.00e+00, 0.00e+00, 8.00e+00, 9.10e+04],
       [0.00e+00, 0.00e+00, 1.00e+00, 6.00e+00, 6.70e+04],
       [0.00e+00, 0.00e+00, 1.00e+00, 7.00e+00, 8.30e+04],
       [0.00e+00, 0.00e+00, 1.00e+00, 7.00e+00, 7.90e+04],
       [0.00e+00, 0.00e+00, 1.00e+00, 5.00e+00, 5.90e+04]])

In [83]:
new_model = LinearRegression()
X = X[:,1:]
new_model.fit(X,y)

LinearRegression()

In [84]:
new_model.score(X,y)

0.9417050937281083