# Big Mart Sales 3

In [1]:
import pandas as pd
import numpy as np

In [2]:
train_df = pd.read_csv('Train.txt')
test_df = pd.read_csv('Test.txt')
train_df["data"] = 'train'
test_df["Item_Outlet_Sales"] = np.nan
test_df["data"] = 'test'

In [3]:
sub = pd.DataFrame({'Item_Identifier': test_df['Item_Identifier'], 'Outlet_Identifier': test_df['Outlet_Identifier']})

In [4]:
df = pd.concat([train_df, test_df], axis=0)
df.head()

Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales,data
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138,train
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228,train
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27,train
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,Grocery Store,732.38,train
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052,train


In [5]:
df.drop(['Item_Identifier', 'Outlet_Identifier'], axis=1, inplace=True)

In [6]:
df["Item_Weight"].fillna(df["Item_Weight"].median(), inplace=True)
df['Outlet_Size'].fillna(df['Outlet_Size'].mode()[0], inplace=True)

In [7]:
object_columns = df.select_dtypes('object').columns
object_columns = object_columns[:-1]
object_columns

Index(['Item_Fat_Content', 'Item_Type', 'Outlet_Size', 'Outlet_Location_Type',
       'Outlet_Type'],
      dtype='object')

In [8]:
df = pd.get_dummies(df, columns=object_columns, prefix=object_columns, drop_first=True)
print(df.shape)
df.head()

(14204, 32)


Unnamed: 0,Item_Weight,Item_Visibility,Item_MRP,Outlet_Establishment_Year,Item_Outlet_Sales,data,Item_Fat_Content_Low Fat,Item_Fat_Content_Regular,Item_Fat_Content_low fat,Item_Fat_Content_reg,...,Item_Type_Snack Foods,Item_Type_Soft Drinks,Item_Type_Starchy Foods,Outlet_Size_Medium,Outlet_Size_Small,Outlet_Location_Type_Tier 2,Outlet_Location_Type_Tier 3,Outlet_Type_Supermarket Type1,Outlet_Type_Supermarket Type2,Outlet_Type_Supermarket Type3
0,9.3,0.016047,249.8092,1999,3735.138,train,1,0,0,0,...,0,0,0,1,0,0,0,1,0,0
1,5.92,0.019278,48.2692,2009,443.4228,train,0,1,0,0,...,0,1,0,1,0,0,1,0,1,0
2,17.5,0.01676,141.618,1999,2097.27,train,1,0,0,0,...,0,0,0,1,0,0,0,1,0,0
3,19.2,0.0,182.095,1998,732.38,train,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
4,8.93,0.0,53.8614,1987,994.7052,train,1,0,0,0,...,0,0,0,0,0,0,1,1,0,0


In [9]:
train_df = df.loc[df["data"]=='train']
test_df = df.loc[df["data"]=='test']
del train_df['data']
test_df.drop(['data', 'Item_Outlet_Sales'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


# Model Building

In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from math import *
from sklearn.linear_model import LinearRegression

In [11]:
train_sub, test_sub = train_test_split(train_df, test_size=0.2, random_state=42)
train_sub.shape, test_sub.shape

((6818, 31), (1705, 31))

In [12]:
x_train = train_sub.drop('Item_Outlet_Sales', axis=1)
y_train = train_sub['Item_Outlet_Sales']
x_test = test_sub.drop('Item_Outlet_Sales', axis=1)
y_test = test_sub['Item_Outlet_Sales']

In [13]:
model = LinearRegression()

In [14]:
model.fit(x_train, y_train**(1/3))

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [15]:
y_pred = model.predict(x_test)**3

In [16]:
r2_score(y_test, y_pred)

0.6004842946803881

In [17]:
rmse = sqrt(mean_squared_error(y_test, y_pred))
rmse

1042.0516917130226

In [18]:
X = train_df.drop('Item_Outlet_Sales', axis=1)
Y = train_df['Item_Outlet_Sales']

In [19]:
model.fit(X, Y**(1/3))

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [20]:
Y_pred = model.predict(test_df)**3

In [21]:
sub['Item_Outlet_Sales'] = Y_pred
sub.to_csv('SampleSubmission.csv', index=False)  # 1183

# Ridge

In [22]:
from sklearn.linear_model import Lasso, Ridge

In [23]:
ridge = Ridge()
ridge.fit(x_train, y_train**(1/3))

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [24]:
ridge.coef_

array([-5.26954133e-03, -7.44652008e-01,  3.17282175e-02,  5.28921483e-02,
        2.32098203e-02,  1.09786151e-01,  1.64386272e-01, -1.27544606e-01,
        6.61564233e-03, -5.38326608e-02,  7.89473727e-02, -2.54460959e-01,
       -1.09177926e-01,  5.80023746e-02,  2.82905919e-02,  1.13869546e-01,
       -3.08306734e-02,  2.41153771e-02,  1.50088100e-03,  2.79714440e-01,
       -2.67132553e-03, -9.90141393e-02, -7.71222321e-02, -1.26356536e+00,
       -1.15762251e+00, -2.87943412e-01, -6.17091444e-01,  5.26347935e+00,
        4.68654403e+00,  8.91191419e+00])

In [25]:
y_pred = ridge.predict(x_test)**3

In [26]:
r2_score(y_test, y_pred)

0.6000560287041945

In [27]:
rmse = sqrt(mean_squared_error(y_test, y_pred))
rmse

1042.6100624422427

# Lasso

In [28]:
lasso = Lasso()

In [29]:
lasso.fit(x_train, y_train**(1/2))

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [30]:
lasso.coef_

array([-0.        , -0.        ,  0.16584542,  0.18402908, -0.        ,
        0.        , -0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        , -0.        ,  0.        ,  0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  6.79658739,  0.        , 14.75610532])

In [31]:
y_pred = lasso.predict(x_test)**2

In [32]:
r2_score(y_test, y_pred)

0.46984518919520746

In [33]:
rmse = sqrt(mean_squared_error(y_test, y_pred))
rmse

1200.3936464511455

In [34]:
lasso.fit(X,Y**(1/2))

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [35]:
Y_pred = lasso.predict(test_df)**2

In [36]:
sub['Item_Outlet_Sales'] = Y_pred
sub.to_csv('SampleSubmission.csv', index=False)

# Grid Seacrh

In [37]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [38]:
params = {'alpha':np.linspace(3,4,100)}
grid = GridSearchCV(ridge,param_grid=params,cv= 5,verbose = 2)

In [39]:
import time
start = time.time()
grid.fit(x_train, y_train**(1/2))
end = time.time()
print(end-start)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.01010101010101 ..........................................
[CV] ........................... alpha=3.01010101010101, total=   0.0s
[CV] alpha=3.01010101010101 ..........................................
[CV] .........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s



[CV] alpha=3.0202020202020203 ........................................
[CV] ......................... alpha=3.0202020202020203, total=   0.0s
[CV] alpha=3.0202020202020203 ........................................
[CV] ......................... alpha=3.0202020202020203, total=   0.0s
[CV] alpha=3.0303030303030303 ........................................
[CV] ......................... alpha=3.0303030303030303, total=   0.0s
[CV] alpha=3.0303030303030303 ........................................
[CV] ......................... alpha=3.0303030303030303, total=   0.0s
[CV] alpha=3.0303030303030303 ........................................
[CV] ......................... alpha=3.0303030303030303, total=   0.0s
[CV] alpha=3.0303030303030303 ........................................
[CV] ......................... alpha=3.0303030303030303, total=   0.0s
[CV] alpha=3.0303030303030303 ........................................
[CV] ......................... alpha=3.0303030303030303, total=   0.0s
[CV] 

[CV] ......................... alpha=3.1616161616161618, total=   0.0s
[CV] alpha=3.1616161616161618 ........................................
[CV] ......................... alpha=3.1616161616161618, total=   0.0s
[CV] alpha=3.1616161616161618 ........................................
[CV] ......................... alpha=3.1616161616161618, total=   0.0s
[CV] alpha=3.1616161616161618 ........................................
[CV] ......................... alpha=3.1616161616161618, total=   0.0s
[CV] alpha=3.1616161616161618 ........................................
[CV] ......................... alpha=3.1616161616161618, total=   0.0s
[CV] alpha=3.1717171717171717 ........................................
[CV] ......................... alpha=3.1717171717171717, total=   0.0s
[CV] alpha=3.1717171717171717 ........................................
[CV] ......................... alpha=3.1717171717171717, total=   0.0s
[CV] alpha=3.1717171717171717 ........................................
[CV] .

[CV] .......................... alpha=3.292929292929293, total=   0.0s
[CV] alpha=3.292929292929293 .........................................
[CV] .......................... alpha=3.292929292929293, total=   0.0s
[CV] alpha=3.292929292929293 .........................................
[CV] .......................... alpha=3.292929292929293, total=   0.0s
[CV] alpha=3.292929292929293 .........................................
[CV] .......................... alpha=3.292929292929293, total=   0.0s
[CV] alpha=3.292929292929293 .........................................
[CV] .......................... alpha=3.292929292929293, total=   0.0s
[CV] alpha=3.303030303030303 .........................................
[CV] .......................... alpha=3.303030303030303, total=   0.0s
[CV] alpha=3.303030303030303 .........................................
[CV] .......................... alpha=3.303030303030303, total=   0.0s
[CV] alpha=3.303030303030303 .........................................
[CV] .

[CV] ......................... alpha=3.4242424242424243, total=   0.0s
[CV] alpha=3.4242424242424243 ........................................
[CV] ......................... alpha=3.4242424242424243, total=   0.0s
[CV] alpha=3.4242424242424243 ........................................
[CV] ......................... alpha=3.4242424242424243, total=   0.0s
[CV] alpha=3.4242424242424243 ........................................
[CV] ......................... alpha=3.4242424242424243, total=   0.0s
[CV] alpha=3.4242424242424243 ........................................
[CV] ......................... alpha=3.4242424242424243, total=   0.0s
[CV] alpha=3.4343434343434343 ........................................
[CV] ......................... alpha=3.4343434343434343, total=   0.0s
[CV] alpha=3.4343434343434343 ........................................
[CV] ......................... alpha=3.4343434343434343, total=   0.0s
[CV] alpha=3.4343434343434343 ........................................
[CV] .

[CV] ......................... alpha=3.5454545454545454, total=   0.0s
[CV] alpha=3.5454545454545454 ........................................
[CV] ......................... alpha=3.5454545454545454, total=   0.0s
[CV] alpha=3.5454545454545454 ........................................
[CV] ......................... alpha=3.5454545454545454, total=   0.0s
[CV] alpha=3.5454545454545454 ........................................
[CV] ......................... alpha=3.5454545454545454, total=   0.0s
[CV] alpha=3.5555555555555554 ........................................
[CV] ......................... alpha=3.5555555555555554, total=   0.0s
[CV] alpha=3.5555555555555554 ........................................
[CV] ......................... alpha=3.5555555555555554, total=   0.0s
[CV] alpha=3.5555555555555554 ........................................
[CV] ......................... alpha=3.5555555555555554, total=   0.0s
[CV] alpha=3.5555555555555554 ........................................
[CV] .

[CV] .......................... alpha=3.676767676767677, total=   0.0s
[CV] alpha=3.676767676767677 .........................................
[CV] .......................... alpha=3.676767676767677, total=   0.0s
[CV] alpha=3.676767676767677 .........................................
[CV] .......................... alpha=3.676767676767677, total=   0.0s
[CV] alpha=3.686868686868687 .........................................
[CV] .......................... alpha=3.686868686868687, total=   0.0s
[CV] alpha=3.686868686868687 .........................................
[CV] .......................... alpha=3.686868686868687, total=   0.0s
[CV] alpha=3.686868686868687 .........................................
[CV] .......................... alpha=3.686868686868687, total=   0.0s
[CV] alpha=3.686868686868687 .........................................
[CV] .......................... alpha=3.686868686868687, total=   0.0s
[CV] alpha=3.686868686868687 .........................................
[CV] .

[CV] alpha=3.808080808080808 .........................................
[CV] .......................... alpha=3.808080808080808, total=   0.0s
[CV] alpha=3.8181818181818183 ........................................
[CV] ......................... alpha=3.8181818181818183, total=   0.0s
[CV] alpha=3.8181818181818183 ........................................
[CV] ......................... alpha=3.8181818181818183, total=   0.0s
[CV] alpha=3.8181818181818183 ........................................
[CV] ......................... alpha=3.8181818181818183, total=   0.0s
[CV] alpha=3.8181818181818183 ........................................
[CV] ......................... alpha=3.8181818181818183, total=   0.0s
[CV] alpha=3.8181818181818183 ........................................
[CV] ......................... alpha=3.8181818181818183, total=   0.0s
[CV] alpha=3.8282828282828283 ........................................
[CV] ......................... alpha=3.8282828282828283, total=   0.0s
[CV] a

[CV] ......................... alpha=3.9393939393939394, total=   0.0s
[CV] alpha=3.9393939393939394 ........................................
[CV] ......................... alpha=3.9393939393939394, total=   0.0s
[CV] alpha=3.9393939393939394 ........................................
[CV] ......................... alpha=3.9393939393939394, total=   0.0s
[CV] alpha=3.9393939393939394 ........................................
[CV] ......................... alpha=3.9393939393939394, total=   0.0s
[CV] alpha=3.9393939393939394 ........................................
[CV] ......................... alpha=3.9393939393939394, total=   0.0s
[CV] alpha=3.94949494949495 ..........................................
[CV] ........................... alpha=3.94949494949495, total=   0.0s
[CV] alpha=3.94949494949495 ..........................................
[CV] ........................... alpha=3.94949494949495, total=   0.0s
[CV] alpha=3.94949494949495 ..........................................
[CV] .

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    7.6s finished


In [40]:
y_pred = grid.predict(x_test)**2

In [41]:
r2_score(y_test, y_pred)

0.6040314911209339

In [42]:
rmse = (np.mean((y_test - y_pred)**2))**0.5
rmse

1037.4153239933173

In [43]:
rmse = sqrt(mean_squared_error(y_test, y_pred))
rmse

1037.4153239933173

In [44]:
grid.fit(X, Y**(1/2))

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.0 .......................................................
[CV] ........................................ alpha=3.0, total=   0.0s
[CV] alpha=3.01010101010101 ..........................................
[CV] ........................... alpha=3.01010101010101, total=   0.0s
[CV] alpha=3.01010101010101 ..........................................
[CV] .........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s



[CV] alpha=3.0202020202020203 ........................................
[CV] ......................... alpha=3.0202020202020203, total=   0.0s
[CV] alpha=3.0202020202020203 ........................................
[CV] ......................... alpha=3.0202020202020203, total=   0.0s
[CV] alpha=3.0202020202020203 ........................................
[CV] ......................... alpha=3.0202020202020203, total=   0.0s
[CV] alpha=3.0202020202020203 ........................................
[CV] ......................... alpha=3.0202020202020203, total=   0.0s
[CV] alpha=3.0202020202020203 ........................................
[CV] ......................... alpha=3.0202020202020203, total=   0.0s
[CV] alpha=3.0303030303030303 ........................................
[CV] ......................... alpha=3.0303030303030303, total=   0.0s
[CV] alpha=3.0303030303030303 ........................................
[CV] ......................... alpha=3.0303030303030303, total=   0.0s
[CV] 

[CV] ......................... alpha=3.1313131313131315, total=   0.0s
[CV] alpha=3.1313131313131315 ........................................
[CV] ......................... alpha=3.1313131313131315, total=   0.0s
[CV] alpha=3.1414141414141414 ........................................
[CV] ......................... alpha=3.1414141414141414, total=   0.0s
[CV] alpha=3.1414141414141414 ........................................
[CV] ......................... alpha=3.1414141414141414, total=   0.0s
[CV] alpha=3.1414141414141414 ........................................
[CV] ......................... alpha=3.1414141414141414, total=   0.0s
[CV] alpha=3.1414141414141414 ........................................
[CV] ......................... alpha=3.1414141414141414, total=   0.0s
[CV] alpha=3.1414141414141414 ........................................
[CV] ......................... alpha=3.1414141414141414, total=   0.0s
[CV] alpha=3.1515151515151514 ........................................
[CV] .

[CV] ......................... alpha=3.2525252525252526, total=   0.0s
[CV] alpha=3.2525252525252526 ........................................
[CV] ......................... alpha=3.2525252525252526, total=   0.0s
[CV] alpha=3.2626262626262625 ........................................
[CV] ......................... alpha=3.2626262626262625, total=   0.0s
[CV] alpha=3.2626262626262625 ........................................
[CV] ......................... alpha=3.2626262626262625, total=   0.0s
[CV] alpha=3.2626262626262625 ........................................
[CV] ......................... alpha=3.2626262626262625, total=   0.0s
[CV] alpha=3.2626262626262625 ........................................
[CV] ......................... alpha=3.2626262626262625, total=   0.0s
[CV] alpha=3.2626262626262625 ........................................
[CV] ......................... alpha=3.2626262626262625, total=   0.0s
[CV] alpha=3.272727272727273 .........................................
[CV] .

[CV] .......................... alpha=3.393939393939394, total=   0.0s
[CV] alpha=3.393939393939394 .........................................
[CV] .......................... alpha=3.393939393939394, total=   0.0s
[CV] alpha=3.393939393939394 .........................................
[CV] .......................... alpha=3.393939393939394, total=   0.0s
[CV] alpha=3.393939393939394 .........................................
[CV] .......................... alpha=3.393939393939394, total=   0.0s
[CV] alpha=3.404040404040404 .........................................
[CV] .......................... alpha=3.404040404040404, total=   0.0s
[CV] alpha=3.404040404040404 .........................................
[CV] .......................... alpha=3.404040404040404, total=   0.0s
[CV] alpha=3.404040404040404 .........................................
[CV] .......................... alpha=3.404040404040404, total=   0.0s
[CV] alpha=3.404040404040404 .........................................
[CV] .

[CV] alpha=3.515151515151515 .........................................
[CV] .......................... alpha=3.515151515151515, total=   0.0s
[CV] alpha=3.515151515151515 .........................................
[CV] .......................... alpha=3.515151515151515, total=   0.0s
[CV] alpha=3.515151515151515 .........................................
[CV] .......................... alpha=3.515151515151515, total=   0.0s
[CV] alpha=3.515151515151515 .........................................
[CV] .......................... alpha=3.515151515151515, total=   0.0s
[CV] alpha=3.525252525252525 .........................................
[CV] .......................... alpha=3.525252525252525, total=   0.0s
[CV] alpha=3.525252525252525 .........................................
[CV] .......................... alpha=3.525252525252525, total=   0.0s
[CV] alpha=3.525252525252525 .........................................
[CV] .......................... alpha=3.525252525252525, total=   0.0s
[CV] a

[CV] ......................... alpha=3.6363636363636367, total=   0.0s
[CV] alpha=3.6363636363636367 ........................................
[CV] ......................... alpha=3.6363636363636367, total=   0.0s
[CV] alpha=3.6363636363636367 ........................................
[CV] ......................... alpha=3.6363636363636367, total=   0.0s
[CV] alpha=3.6363636363636367 ........................................
[CV] ......................... alpha=3.6363636363636367, total=   0.0s
[CV] alpha=3.6464646464646466 ........................................
[CV] ......................... alpha=3.6464646464646466, total=   0.0s
[CV] alpha=3.6464646464646466 ........................................
[CV] ......................... alpha=3.6464646464646466, total=   0.0s
[CV] alpha=3.6464646464646466 ........................................
[CV] ......................... alpha=3.6464646464646466, total=   0.0s
[CV] alpha=3.6464646464646466 ........................................
[CV] .

[CV] .......................... alpha=3.757575757575758, total=   0.0s
[CV] alpha=3.757575757575758 .........................................
[CV] .......................... alpha=3.757575757575758, total=   0.0s
[CV] alpha=3.757575757575758 .........................................
[CV] .......................... alpha=3.757575757575758, total=   0.0s
[CV] alpha=3.7676767676767677 ........................................
[CV] ......................... alpha=3.7676767676767677, total=   0.0s
[CV] alpha=3.7676767676767677 ........................................
[CV] ......................... alpha=3.7676767676767677, total=   0.0s
[CV] alpha=3.7676767676767677 ........................................
[CV] ......................... alpha=3.7676767676767677, total=   0.0s
[CV] alpha=3.7676767676767677 ........................................
[CV] ......................... alpha=3.7676767676767677, total=   0.0s
[CV] alpha=3.7676767676767677 ........................................
[CV] .

[CV] .......................... alpha=3.878787878787879, total=   0.0s
[CV] alpha=3.878787878787879 .........................................
[CV] .......................... alpha=3.878787878787879, total=   0.0s
[CV] alpha=3.888888888888889 .........................................
[CV] .......................... alpha=3.888888888888889, total=   0.0s
[CV] alpha=3.888888888888889 .........................................
[CV] .......................... alpha=3.888888888888889, total=   0.0s
[CV] alpha=3.888888888888889 .........................................
[CV] .......................... alpha=3.888888888888889, total=   0.0s
[CV] alpha=3.888888888888889 .........................................
[CV] .......................... alpha=3.888888888888889, total=   0.0s
[CV] alpha=3.888888888888889 .........................................
[CV] .......................... alpha=3.888888888888889, total=   0.0s
[CV] alpha=3.8989898989898992 ........................................
[CV] .

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    8.2s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=None, normalize=False, random_state=None,
                             solver='auto', tol=0.001),
             iid='warn', n_jobs=None,
             param_grid={'alpha': array([3.        , 3.01010101, 3.02020202, 3.03030303, 3.04040404,
       3.05050505, 3.06060606, 3.07070707, 3.08080808, 3.09090909,
       3.1010101 , 3.111111...
       3.75757576, 3.76767677, 3.77777778, 3.78787879, 3.7979798 ,
       3.80808081, 3.81818182, 3.82828283, 3.83838384, 3.84848485,
       3.85858586, 3.86868687, 3.87878788, 3.88888889, 3.8989899 ,
       3.90909091, 3.91919192, 3.92929293, 3.93939394, 3.94949495,
       3.95959596, 3.96969697, 3.97979798, 3.98989899, 4.        ])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=2)

In [45]:
Y_pred = grid.predict(test_df)**2

In [46]:
sub['Item_Outlet_Sales'] = Y_pred
sub.to_csv('SampleSubmission.csv', index=False)  # 1178

# Decision Tree

In [47]:
from sklearn.tree import DecisionTreeRegressor

In [48]:
dtree = DecisionTreeRegressor()

In [49]:
dtree.fit(x_train, y_train)

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=None, splitter='best')

In [50]:
params = {'criterion':['mse'],
        'max_depth':[None,2,3,4,5,6,7],
            'min_samples_leaf':[15], 
            'min_samples_split':[29,30,31,32,33,34]}

In [51]:
random = RandomizedSearchCV(dtree, param_distributions=params,cv=5, n_iter=10, random_state=42)

In [52]:
random.fit(x_train, y_train)

RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=DecisionTreeRegressor(criterion='mse',
                                                   max_depth=None,
                                                   max_features=None,
                                                   max_leaf_nodes=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   presort=False,
                                                   random_state=None,
                                                   splitter='best'),
                   iid='warn', n_iter=10, n_jobs=None,
                   param_di

In [53]:
random.best_estimator_

DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=15,
                      min_samples_split=30, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=None, splitter='best')

In [54]:
y_pred = random.predict(x_test)

In [55]:
r2_score(y_test, y_pred)

0.607590071331845

In [56]:
random.score(x_test,y_test)

0.607590071331845

In [57]:
rmse = sqrt(mean_squared_error(y_test, y_pred))
rmse

1032.74316263205

In [58]:
random.fit(X, Y)

RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=DecisionTreeRegressor(criterion='mse',
                                                   max_depth=None,
                                                   max_features=None,
                                                   max_leaf_nodes=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   presort=False,
                                                   random_state=None,
                                                   splitter='best'),
                   iid='warn', n_iter=10, n_jobs=None,
                   param_di

In [59]:
Y_pred = random.predict(test_df)

In [60]:
sub['Item_Outlet_Sales'] = Y_pred
sub.to_csv('SampleSubmission.csv', index=False)

# Random Forest

In [61]:
from sklearn.ensemble import RandomForestRegressor

In [62]:
rf = RandomForestRegressor()

In [64]:
param_dist = {"n_estimators":[200],
#               "max_features": [6,7],
#               "bootstrap": [True, False],
#                 'max_depth':[6,7],
#                 'min_samples_leaf':[3,4], 
#                 'min_samples_split':[3,4]
                  }

In [65]:
random_search = RandomizedSearchCV(rf, param_distributions=param_dist,
                                   n_iter=10,cv=5)
random_search.fit(x_train, y_train)



RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=RandomForestRegressor(bootstrap=True,
                                                   criterion='mse',
                                                   max_depth=None,
                                                   max_features='auto',
                                                   max_leaf_nodes=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   n_estimators='warn',
                                                   n_jobs=None, oob_score=False,
                                                   random_state=N

In [66]:
y_pred = random.predict(x_test)

In [67]:
r2_score(y_test, y_pred)

0.6203752137234622

In [68]:
random.score(x_test, y_test)

0.6203752137234622

In [69]:
rmse = sqrt(mean_squared_error(y_test, y_pred))
rmse

1015.7799001975776

In [None]:
random_search.fit(X,Y)



In [None]:
Y_pred = random_search.predict(test_df)

In [None]:
sub['Item_Outlet_Sales'] = Y_pred
sub.to_csv('SampleSubmission.csv', index=False) #1155