%% [markdown]<br>
## Import necessary packages and libraries

%%

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.ensemble import RandomForestClassifier

%%<br>
Load the Housing data

In [2]:
df = pd.read_csv('./data/housing.csv')
print(df.head())

   Id  MSSubClass MSZoning  LotFrontage  LotArea Street LotShape LandContour  \
0   1          60       RL         65.0     8450   Pave      Reg         Lvl   
1   2          20       RL         80.0     9600   Pave      Reg         Lvl   
2   3          60       RL         68.0    11250   Pave      IR1         Lvl   
3   4          70       RL         60.0     9550   Pave      IR1         Lvl   
4   5          60       RL         84.0    14260   Pave      IR1         Lvl   

  Utilities LotConfig  ... EnclosedPorch 3SsnPorch ScreenPorch PoolArea  \
0    AllPub    Inside  ...             0         0           0        0   
1    AllPub       FR2  ...             0         0           0        0   
2    AllPub    Inside  ...             0         0           0        0   
3    AllPub    Corner  ...           272         0           0        0   
4    AllPub       FR2  ...             0         0           0        0   

  MiscVal MoSold  YrSold  SaleType  SaleCondition  SalePrice  
0    

%%<br>
Split the data into train and test

In [3]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(labels=['SalePrice'], axis=1), df['SalePrice']
                                    , random_state=0, test_size=0.3)
print(X_train.head())
print(y_train.head())

        Id  MSSubClass MSZoning  LotFrontage  LotArea Street LotShape  \
64      65          60       RL    70.049958     9375   Pave      Reg   
682    683         120       RL    70.049958     2887   Pave      Reg   
960    961          20       RL    50.000000     7207   Pave      IR1   
1384  1385          50       RL    60.000000     9060   Pave      Reg   
1100  1101          30       RL    60.000000     8400   Pave      Reg   

     LandContour Utilities LotConfig  ... OpenPorchSF EnclosedPorch 3SsnPorch  \
64           Lvl    AllPub    Inside  ...          36             0         0   
682          HLS    AllPub    Inside  ...           0             0         0   
960          Lvl    AllPub    Inside  ...           0             0         0   
1384         Lvl    AllPub    Inside  ...           0             0         0   
1100         Bnk    AllPub    Inside  ...           0             0         0   

     ScreenPorch PoolArea MiscVal  MoSold  YrSold  SaleType  SaleCondition

%% [markdown]<br>
## Forward Feature Selection

%%

In [4]:
sfs = SequentialFeatureSelector(RandomForestClassifier(), k_features=10, forward=True, 
        floating=False, scoring='accuracy', cv=2)
# fit the object to the training data
sfs = sfs.fit(X_train, y_train)

te-packages/sklearn/utils/validation.py", line 72, in inner_f
    return f(**kwargs)
  File "/Users/vijay/opt/anaconda3/envs/envml/lib/python3.8/site-packages/sklearn/utils/validation.py", line 795, in check_X_y
    X = check_array(X, accept_sparse=accept_sparse,
  File "/Users/vijay/opt/anaconda3/envs/envml/lib/python3.8/site-packages/sklearn/utils/validation.py", line 72, in inner_f
    return f(**kwargs)
  File "/Users/vijay/opt/anaconda3/envs/envml/lib/python3.8/site-packages/sklearn/utils/validation.py", line 598, in check_array
    array = np.asarray(array, order=order, dtype=dtype)
  File "/Users/vijay/opt/anaconda3/envs/envml/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
    return array(a, dtype, copy=False, order=order)
ValueError: could not convert string to float: 'RL'

Traceback (most recent call last):
  File "/Users/vijay/opt/anaconda3/envs/envml/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
  

%%<br>
Print the selected features.

In [5]:
selected_features = X_train.columns[list(sfs.k_feature_idx_)]
print(selected_features)

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'LotShape', 'LandContour', 'Utilities', 'LotConfig'],
      dtype='object')


%%<br>
Print final prediction score

In [6]:
print(sfs.k_score_)

nan


%%<br>
Transform the newly selected features

In [7]:
X_train_sfs = sfs.transform(X_train)
X_test_sfs = sfs.transform(X_test)

%%

In [8]:
print(X_test_sfs)
print(X_test_sfs[0, :])
print(X_test_sfs[0])

[[530 20 'RL' ... 'Lvl' 'AllPub' 'CulDSac']
 [492 50 'RL' ... 'Lvl' 'AllPub' 'Inside']
 [460 50 'RL' ... 'Bnk' 'AllPub' 'Corner']
 ...
 [655 20 'RL' ... 'Lvl' 'AllPub' 'Inside']
 [1281 20 'RL' ... 'Lvl' 'AllPub' 'Inside']
 [899 20 'RL' ... 'Lvl' 'AllPub' 'Inside']]
[530 20 'RL' 70.04995836802665 32668 'Pave' 'IR1' 'Lvl' 'AllPub' 'CulDSac']
[530 20 'RL' 70.04995836802665 32668 'Pave' 'IR1' 'Lvl' 'AllPub' 'CulDSac']


%%