# Test All Possible Regressions in sklearn

This project lists and different model performances on cars dataset.

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
df_original = pd.read_csv("datasets/cars.csv")
df = df_original.drop("Car_ID", axis=1)
df.head()

Unnamed: 0,Brand,Model,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,Price
0,Toyota,Corolla,2018,50000,Petrol,Manual,First,15,1498,108,5,800000
1,Honda,Civic,2019,40000,Petrol,Automatic,Second,17,1597,140,5,1000000
2,Ford,Mustang,2017,20000,Petrol,Automatic,First,10,4951,395,4,2500000
3,Maruti,Swift,2020,30000,Diesel,Manual,Third,23,1248,74,5,600000
4,Hyundai,Sonata,2016,60000,Diesel,Automatic,Second,18,1999,194,5,850000


In [3]:
le_brand = LabelEncoder()
le_brand.fit(df["Brand"])
df["Brand"] = le_brand.transform(df["Brand"])

le_model = LabelEncoder()
le_model.fit(df["Model"])
df["Model"] = le_model.transform(df["Model"])

le_fuel_type = LabelEncoder()
le_fuel_type.fit(df["Fuel_Type"])
df["Fuel_Type"] = le_fuel_type.transform(df["Fuel_Type"])

le_transmission = LabelEncoder()
le_transmission.fit(df["Transmission"])
df["Transmission"] = le_transmission.transform(df["Transmission"])

le_owner_type = LabelEncoder()
le_owner_type.fit(df["Owner_Type"])
df["Owner_Type"] = le_owner_type.transform(df["Owner_Type"])

In [4]:
df.head()

Unnamed: 0,Brand,Model,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,Price
0,9,15,2018,50000,1,1,0,15,1498,108,5,800000
1,3,14,2019,40000,1,0,1,17,1597,140,5,1000000
2,2,30,2017,20000,1,0,0,10,4951,395,4,2500000
3,6,42,2020,30000,0,1,2,23,1248,74,5,600000
4,4,41,2016,60000,0,0,1,18,1999,194,5,850000


In [5]:
X = df.drop("Price", axis=1)
y = pd.DataFrame(df["Price"])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=True)

In [7]:
from sklearn.utils import all_estimators
regression_classes =  all_estimators(type_filter="regressor")

In [None]:
test_results = []

for name, class_ in regression_classes:
    try:
        regr = class_()
        regr.fit(X_train, y_train.values.ravel())
        train_score = regr.score(X_train, y_train) * 100
        test_score = regr.score(X_test, y_test) * 100
        test_results.append((name, train_score, test_score))
    except Exception as e:
        pass

In [9]:
pd.set_option("display.max_rows", None)
pd.options.display.float_format = '{:,.2f}'.format
table = pd.DataFrame(test_results, columns=["Name", "Train Score", "Test Score"])
table.set_index("Name").sort_values(by="Test Score", ascending=False)

Unnamed: 0_level_0,Train Score,Test Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
ExtraTreesRegressor,100.0,93.13
RandomForestRegressor,98.9,90.85
BaggingRegressor,97.42,89.99
GradientBoostingRegressor,99.98,89.3
AdaBoostRegressor,98.31,86.94
HistGradientBoostingRegressor,95.08,85.96
ExtraTreeRegressor,100.0,82.89
DecisionTreeRegressor,100.0,82.08
PLSRegression,83.39,81.18
ElasticNet,81.59,79.99
