In [1]:
import datetime
import math
import re
import pandas as pd
import numpy as np
from pandas_profiling import ProfileReport
import matplotlib.pyplot as plt
from sklearn.utils import check_array
from sklearn.preprocessing import LabelBinarizer, OrdinalEncoder, KBinsDiscretizer, OneHotEncoder, LabelEncoder, FunctionTransformer, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV, train_test_split, cross_validate
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
import lightgbm as lgbm

#### Utility

In [74]:
def mean_absolute_percentage_error(y_true, y_pred):
    """Returns MAPE (%) for prediction"""
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    y_true = y_true.reshape(-1, 1)
    y_pred = y_pred.reshape(-1, 1)
    return np.abs(np.mean(np.abs((y_true - y_pred) / y_true)))

In [3]:
def log_preprocessing(array: np.array):
    return array + 1

In [4]:
def inverse_log_preprocessing(array: np.array):
    return array - 1

In [5]:
def inverse_np_log(array: np.array):
    def inverse_log(x):
        return math.e ** x
    func = np.vectorize(inverse_log)
    result = func(array)
    return result

#### Utility constants

In [6]:
DATE_AVG = pd.to_datetime('2015-06-01')
RANDOM_STATE = 23

#### Load data

In [7]:
calendar = pd.read_csv("calendar.csv")
reviews = pd.read_csv("reviews.csv")
sample_submission = pd.read_csv("sample_submission.csv")
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

In [8]:
calendar

Unnamed: 0,listing_id,date,available
0,9554,2019-08-18,t
1,97446,2019-11-04,f
2,97446,2019-11-03,f
3,97446,2019-11-02,f
4,97446,2019-11-01,f
...,...,...,...
27307470,27182471,2018-11-09,f
27307471,27182471,2018-11-08,f
27307472,27182471,2018-11-07,f
27307473,27182471,2018-11-06,f


In [9]:
reviews

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,9554,1184025,2012-04-26,1809049,Hana,"I stayed in London for a month to study, exp..."
1,9554,1206322,2012-04-30,2237488,Rishi,My bnb request was very last minute and i was ...
2,9554,1258541,2012-05-10,2150467,Panee,First time as airbnb!First time to London! I c...
3,9554,1405284,2012-06-03,1864672,Simone Cristina,A wonderful experience! The house is very well...
4,9554,1475969,2012-06-13,2438453,Sondra,We are so grateful that we trusted our intuiti...
...,...,...,...,...,...,...
1137319,29735949,344807582,2018-11-04,27552372,Declan,"Clean, spacious, stylish apartment close to ev..."
1137320,29736900,344387254,2018-11-03,109537206,Shazia,spotless clean flat with amazing view. the hos...
1137321,29756033,344498174,2018-11-03,26000990,Andrea,The host canceled this reservation 55 days bef...
1137322,29775194,345574439,2018-11-05,34546792,Robert,The host canceled this reservation 45 days bef...


In [10]:
train_data

Unnamed: 0,id,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,...,square_feet,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,price
0,127860,Double bedroom in cottage Twickenham (sleeps 1-2),"One gorgeous, light-filled double bedroom (sle...","A beautiful, light-filled double bedroom is ou...","One gorgeous, light-filled double bedroom (sle...",none,"Very, very safe area, great transport links an...",We have a super-gorgeous cat,Twickenham Rugby Stadium is 15 mins walk Water...,"You'll have full use of the cottage, two recep...",...,,300.0,10.0,1,10.0,2,strict_14_with_grace_period,f,f,1000.0
1,325809,Big House for Olympics sleeps 6 to8,,"A beautiful, modern, art-filled and clean 4 st...","A beautiful, modern, art-filled and clean 4 st...",none,,,,,...,1400.0,771.0,,1,0.0,14,strict_14_with_grace_period,f,f,771.0
2,429045,The Old Coach House (Olympics),,"Fabulous, recently refurbished original Victor...","Fabulous, recently refurbished original Victor...",none,,,,,...,1800.0,600.0,,1,0.0,14,strict_14_with_grace_period,f,f,1500.0
3,473637,Brand New contemporary mews house,,Stunning contemporary mews available for let o...,Stunning contemporary mews available for let o...,none,,,,,...,2100.0,300.0,100.0,7,100.0,14,strict_14_with_grace_period,f,f,2000.0
4,533943,LUXURY APT. NEAR BUCKINGHAM PALACE,“The area around Westminster Cathedral has lon...,"This stunning, spacious, 4 bedroom, top floor ...",“The area around Westminster Cathedral has lon...,family,The proximity of all of London's main attracti...,,"Only 5 minutes walk to Victoria Station, the m...",,...,2220.0,2505.0,180.0,1,0.0,14,strict_14_with_grace_period,f,t,901.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51810,29797787,"Bright, modern flat in Wimbledon Village",Recently refurbished open plan flat set in a q...,"Light, bright first floor flat with a brand ne...",Recently refurbished open plan flat set in a q...,none,,,,,...,,,,1,0.0,2,flexible,f,f,68.0
51811,29797809,Luxury 2 Bdrm by The Shard Panoramic city views,Luxury 2 Bdrm by Shard with amazing panoramic ...,A modern luxury 2 bedroom apartment by The Sha...,Luxury 2 Bdrm by Shard with amazing panoramic ...,none,,,,Sole use of the entire apartment,...,,3856.0,75.0,2,10.0,3,strict_14_with_grace_period,f,f,349.0
51812,29797854,"Spare bed, close to london",,,,none,,,,,...,,,,1,0.0,1,flexible,f,f,100.0
51813,29797899,Greenlane Guest House,"Property close by Heathrow Airport, A Large do...","Large open plan living area, with a spacious s...","Property close by Heathrow Airport, A Large do...",none,"A lot of greenery around this property, great ...","Full internet service provided, Netflix and WiFi","Best way to travel is via a bus, easy to hop o...",Private area for guests- not shared,...,,,,1,0.0,1,flexible,f,f,69.0


In [11]:
sample_submission

Unnamed: 0,id,price
0,9554,0
1,11076,0
2,13913,0
3,17402,0
4,24328,0
...,...,...
22995,13559787,0
22996,13561162,0
22997,13561394,0
22998,13561787,0


In [12]:
test_data

Unnamed: 0,id,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,...,amenities,square_feet,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification
0,9554,"Cozy, 3 minutes to Piccadilly Line",PLEASE CONTACT ME BEFORE BOOKING Homely apartm...,"Hello people, This is a bright, comfortable ro...",PLEASE CONTACT ME BEFORE BOOKING Homely apartm...,none,Details to follow..,,details to follow when i get a chance..,"Kitchen, small dining / smoking room (if you s...",...,"{TV,Internet,Wifi,Kitchen,""Smoking allowed"",Br...",,,7.0,1,15.0,1,strict_14_with_grace_period,t,f
1,11076,The Sanctuary,The room has a double bed and a single foldawa...,This Listing is for The Sanctury The accommoda...,The room has a double bed and a single foldawa...,none,"Ealing Broadway, as short walk from our place ...",,extemely good transport links to central londo...,Huge family kitchen and good wifi,...,"{TV,""Cable TV"",Internet,Wifi,Breakfast,""Pets l...",,,,2,35.0,2,strict_14_with_grace_period,f,f
2,13913,Holiday London DB Room Let-on going,My bright double bedroom with a large window h...,"Hello Everyone, I'm offering my lovely double ...",My bright double bedroom with a large window h...,business,Finsbury Park is a friendly melting pot commun...,For art lovers I can give guest my Tate Member...,The flat only a 10 minute walk to Finsbury Par...,Guest will have access to the self catering ki...,...,"{TV,""Cable TV"",Wifi,Kitchen,""Paid parking off ...",538.0,100.0,15.0,1,15.0,1,moderate,f,f
3,17402,Superb 3-Bed/2 Bath & Wifi: Trendy W1,"Open from June 2018 after a 3-year break, we a...",Ready again from June 2018 for bookings after ...,"Open from June 2018 after a 3-year break, we a...",none,"Location, location, location! You won't find b...",This property has new flooring throughout. Gue...,You can walk to tourist London or take numerou...,Full use of whole independent apartment,...,"{TV,Wifi,Kitchen,""Paid parking off premises"",E...",,350.0,65.0,4,10.0,3,strict_14_with_grace_period,f,f
4,24328,Battersea 2 bedroom house & parking,"Artist house, high ceiling bedrooms, private p...",- End of terrace two bedroom house close to So...,"Artist house, high ceiling bedrooms, private p...",family,"- Battersea is a quiet family area, easy acces...",- Please have a profile or tell us more about ...,"- 5 mins walk to Battersea Park, 15 mins walk ...",- there is a communal garden in our complex - ...,...,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Free par...",1001.0,250.0,70.0,2,15.0,90,strict_14_with_grace_period,t,t
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22995,13559787,"Spacious, Cool Clapton Apt in the Buzz of Hackney",Welcome to my oasis in the heart of Hackney! T...,,Welcome to my oasis in the heart of Hackney! T...,none,,,,,...,"{TV,Wifi,""Air conditioning"",Kitchen,Breakfast,...",,500.0,40.0,1,0.0,3,flexible,f,f
22996,13561162,"Nice Apartment; Great Portland St, Regents Pk,...",My Newly Styled Apartment is In a great Locati...,- Located At The Corner Of 'Robert Street & Al...,My Newly Styled Apartment is In a great Locati...,none,Euston Great Portland Street Marylebone Camden...,Arrival Times Must Be Organised With Us. Late...,,Private Apartment,...,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Buzzer/w...",,99.0,0.0,2,0.0,1,strict_14_with_grace_period,f,f
22997,13561394,Beautiful 3 bed house in Battersea,Our house is close to Battersea Park (10 min w...,,Our house is close to Battersea Park (10 min w...,none,,,,,...,"{TV,Wifi,Kitchen,""Free parking on premises"",""I...",,75.0,35.0,1,0.0,4,flexible,f,f
22998,13561787,A charming family house close to tube station.,"Sole use of house with parking, just over 5 mi...",,"Sole use of house with parking, just over 5 mi...",none,,,,,...,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Free par...",,250.0,40.0,1,0.0,2,strict_14_with_grace_period,f,f


## EDA

In [21]:
profile = ProfileReport(train_data)
profile.to_file(output_file="train_data_profile_report.html")

HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=57.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Generate report structure', max=1.0, style=ProgressStyle(…




HBox(children=(FloatProgress(value=0.0, description='Render HTML', max=1.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Export report to file', max=1.0, style=ProgressStyle(desc…




In [61]:
train_data['price'].max()

13700.0

In [13]:
log_scale_transformer = Pipeline(steps=[
    ('log_preprocessing', FunctionTransformer(log_preprocessing, validate=False)),
    ('log_transorm', FunctionTransformer(np.log, validate=False)),
    ('scaler', StandardScaler())
])
temp_X = np.asarray(train_data['price']).reshape(-1, 1)
temp_X = log_scale_transformer.fit_transform(temp_X)

Look at profile report

## Preprocessing data

In [14]:
used_features = ['accommodates', 'bathrooms', 'bedrooms', 'beds', 
                 'square_feet', 'security_deposit', 'cleaning_fee',
                 'guests_included', 'extra_people', 'minimum_nights', 'price']
used_features_test = ['accommodates', 'bathrooms', 'bedrooms', 'beds', 
                 'square_feet', 'security_deposit', 'cleaning_fee',
                 'guests_included', 'extra_people', 'minimum_nights']

In [27]:
train_X = train_data.copy()[used_features]

In [54]:
test_X = test_data.copy()[used_features_test]
test_X

Unnamed: 0,accommodates,bathrooms,bedrooms,beds,square_feet,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights
0,2,,1.0,1.0,,,7.0,1,15.0,1
1,2,,1.0,1.0,,,,2,35.0,2
2,2,1.0,1.0,1.0,538.0,100.0,15.0,1,15.0,1
3,6,2.0,3.0,3.0,,350.0,65.0,4,10.0,3
4,4,1.5,2.0,2.0,1001.0,250.0,70.0,2,15.0,90
...,...,...,...,...,...,...,...,...,...,...
22995,2,1.0,1.0,1.0,,500.0,40.0,1,0.0,3
22996,3,1.0,1.0,2.0,,99.0,0.0,2,0.0,1
22997,6,1.5,3.0,3.0,,75.0,35.0,1,0.0,4
22998,5,1.0,3.0,3.0,,250.0,40.0,1,0.0,2


In [28]:
train_X

Unnamed: 0,accommodates,bathrooms,bedrooms,beds,square_feet,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,price
0,2,1.0,1.0,1.0,,300.0,10.0,1,10.0,2,1000.0
1,8,2.5,3.0,4.0,1400.0,771.0,,1,0.0,14,771.0
2,6,3.0,3.0,3.0,1800.0,600.0,,1,0.0,14,1500.0
3,9,4.0,4.0,6.0,2100.0,300.0,100.0,7,100.0,14,2000.0
4,8,3.0,4.0,6.0,2220.0,2505.0,180.0,1,0.0,14,901.0
...,...,...,...,...,...,...,...,...,...,...,...
51810,2,1.0,2.0,1.0,,,,1,0.0,2,68.0
51811,6,2.0,2.0,2.0,,3856.0,75.0,2,10.0,3,349.0
51812,1,1.5,1.0,1.0,,,,1,0.0,1,100.0
51813,2,1.0,1.0,4.0,,,,1,0.0,1,69.0


In [18]:
used_features_new = ['accommodates', 'bathrooms', 'bedrooms', 'beds', 
                 'square_feet', 'security_deposit', 'cleaning_fee',
                 'guests_included', 'extra_people', 'minimum_nights']

In [19]:
numerical_features = ['accommodates', 'bathrooms', 'bedrooms', 'beds', 
                 'square_feet', 'security_deposit', 'cleaning_fee',
                 'guests_included', 'extra_people', 'minimum_nights']

### Missing values

In [29]:
numerical_imputer = SimpleImputer(strategy='median')

In [30]:
train_X_price = np.asarray(train_X['price']).reshape(-1, 1)

In [31]:
train_X.drop(['price'], axis=1, inplace=True)

In [32]:
missing_preprocessor = ColumnTransformer(transformers=[
    ('numerical', numerical_imputer, numerical_features)],
     remainder='drop', n_jobs=-1)

In [33]:
train_X = missing_preprocessor.fit_transform(train_X)
train_X

array([[ 2. ,  1. ,  1. , ...,  1. , 10. ,  2. ],
       [ 8. ,  2.5,  3. , ...,  1. ,  0. , 14. ],
       [ 6. ,  3. ,  3. , ...,  1. ,  0. , 14. ],
       ...,
       [ 1. ,  1.5,  1. , ...,  1. ,  0. ,  1. ],
       [ 2. ,  1. ,  1. , ...,  1. ,  0. ,  1. ],
       [ 3. ,  2. ,  2. , ...,  1. ,  0. ,  2. ]])

In [34]:
train_X = pd.DataFrame(data=train_X, columns=used_features_new)
train_X

Unnamed: 0,accommodates,bathrooms,bedrooms,beds,square_feet,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights
0,2.0,1.0,1.0,1.0,753.0,300.0,10.0,1.0,10.0,2.0
1,8.0,2.5,3.0,4.0,1400.0,771.0,30.0,1.0,0.0,14.0
2,6.0,3.0,3.0,3.0,1800.0,600.0,30.0,1.0,0.0,14.0
3,9.0,4.0,4.0,6.0,2100.0,300.0,100.0,7.0,100.0,14.0
4,8.0,3.0,4.0,6.0,2220.0,2505.0,180.0,1.0,0.0,14.0
...,...,...,...,...,...,...,...,...,...,...
51810,2.0,1.0,2.0,1.0,753.0,100.0,30.0,1.0,0.0,2.0
51811,6.0,2.0,2.0,2.0,753.0,3856.0,75.0,2.0,10.0,3.0
51812,1.0,1.5,1.0,1.0,753.0,100.0,30.0,1.0,0.0,1.0
51813,2.0,1.0,1.0,4.0,753.0,100.0,30.0,1.0,0.0,1.0


#### Logarithmic function transformer

In [35]:
log_scale_transformer = Pipeline(steps=[
    ('log_preprocessing', FunctionTransformer(log_preprocessing, inverse_log_preprocessing, validate=False)),
    ('log_transorm', FunctionTransformer(np.log, inverse_np_log, validate=False)),
    ('scaler', StandardScaler())
])

In [36]:
log_scale_price_transformer = Pipeline(steps=[
    ('log_preprocessing', FunctionTransformer(log_preprocessing, inverse_log_preprocessing, validate=False)),
    ('log_transorm', FunctionTransformer(np.log, inverse_np_log, validate=False)),
    ('scaler', StandardScaler())
])

In [37]:
log_scale_preprocessor = ColumnTransformer(transformers=[
    ('log_scale', log_scale_transformer, numerical_features)],
     remainder='drop', n_jobs=-1)

In [38]:
train_X = log_scale_preprocessor.fit_transform(train_X)

In [39]:
train_X_price = log_scale_price_transformer.fit_transform(train_X_price)

In [45]:
train_y = pd.DataFrame(data=train_X_price, columns=['price'])

In [46]:
train_X = pd.DataFrame(data=train_X, columns=used_features_new)

## Model selection and cross-validation

In [47]:
scorer = make_scorer(mean_absolute_percentage_error, greater_is_better=False)

In [48]:
val_train_X, val_test_X, val_train_y, valid_test_y = train_test_split(train_X, train_y, test_size=0.2, random_state=RANDOM_STATE)

## Prediction

In [55]:
test_X = missing_preprocessor.transform(test_X)
test_X

array([[ 2. ,  1. ,  1. , ...,  1. , 15. ,  1. ],
       [ 2. ,  1. ,  1. , ...,  2. , 35. ,  2. ],
       [ 2. ,  1. ,  1. , ...,  1. , 15. ,  1. ],
       ...,
       [ 6. ,  1.5,  3. , ...,  1. ,  0. ,  4. ],
       [ 5. ,  1. ,  3. , ...,  1. ,  0. ,  2. ],
       [ 2. ,  1. ,  1. , ...,  1. ,  0. ,  1. ]])

In [56]:
test_X = pd.DataFrame(data=test_X, columns=used_features_new)
test_X = log_scale_preprocessor.transform(test_X)
test_X = pd.DataFrame(data=test_X, columns=used_features_new)
test_X

Unnamed: 0,accommodates,bathrooms,bedrooms,beds,square_feet,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights
0,-0.584410,-0.523183,-0.341796,-0.716333,0.007170,0.339422,-1.077076,-0.513550,1.223568,-0.844451
1,-0.584410,-0.523183,-0.341796,-0.716333,0.007170,0.339422,0.145329,0.681877,1.803300,-0.188691
2,-0.584410,-0.523183,-0.341796,-0.716333,-5.667409,0.339422,-0.451548,-0.513550,1.223568,-0.844451
3,1.414636,1.382496,1.640211,1.229452,0.007170,0.904714,0.827278,2.187936,0.955701,0.276579
4,0.620791,0.525588,0.817604,0.421878,4.814260,0.752537,0.893180,0.681877,1.223568,5.329952
...,...,...,...,...,...,...,...,...,...,...
22995,-0.584410,-0.523183,-0.341796,-0.716333,0.007170,1.066187,0.397639,-0.513550,-0.758546,0.276579
22996,0.094324,-0.523183,-0.341796,0.421878,0.007170,0.334907,-2.953660,0.681877,-0.758546,-0.844451
22997,1.414636,0.525588,1.640211,1.229452,0.007170,0.210366,0.280273,-0.513550,-0.758546,0.637470
22998,1.050946,-0.523183,1.640211,1.229452,0.007170,0.752537,0.397639,-0.513550,-0.758546,-0.188691


In [184]:
model = lgbm.sklearn.LGBMRegressor(learning_rate=0.95,
                                       n_estimators=25000,
                                       n_jobs=-1,
                                       subsample=0.5,
                                       colsample_bytree=0.65,
                                       reg_alpha=0.04,
                                       reg_lambda=0.28,
                                       objective="mape",
                                       random_state=42)

In [185]:
model.fit(train_X, train_y)

LGBMRegressor(colsample_bytree=0.65, learning_rate=0.95, n_estimators=25000,
              objective='mape', random_state=42, reg_alpha=0.04,
              reg_lambda=0.28, subsample=0.5)

In [186]:
pred = model.predict(train_X)

In [187]:
mean_absolute_percentage_error(train_y, pred)

1.1500731634236019

In [188]:
prediction = model.predict(test_X)

In [189]:
prediction = log_scale_price_transformer.inverse_transform(prediction)

In [190]:
submission = pd.DataFrame(data=zip(test_data['id'], prediction), columns=['id', 'price'])

In [191]:
submission

Unnamed: 0,id,price
0,9554,47.754581
1,11076,59.680404
2,13913,49.661215
3,17402,124.391300
4,24328,137.827484
...,...,...
22995,13559787,103.810317
22996,13561162,209.215335
22997,13561394,79.292104
22998,13561787,153.164978


In [296]:
submission

Unnamed: 0,id,price
0,9554,40.277229
1,11076,60.080887
2,13913,45.715095
3,17402,146.449236
4,24328,97.572770
...,...,...
22995,13559787,106.986834
22996,13561162,111.382336
22997,13561394,106.904207
22998,13561787,121.487876


In [192]:
submission.to_csv("submission2.csv", index=False)

## Experiments

#### Extract amenities and make them work

In [None]:
features = set()
amenities = pd.concat([train_data['amenities'], test_data['amenities']])
for i in range(len(amenities)):
    amenity_list = amenities.iloc[i].strip('{}').split(',')
    for feature in amenity_list:
        features.add(feature.strip('""'))

In [None]:
unnecesary_features = ['', 'Accessible-height bed', 'Accessible-height toilet',
                       'Alfresco bathtub', 'Amazon Echo', 'Baby bath', 'Baby monitor', 'Babysitter recommendations',
                       'Bathtub with bath chair', 'Beach essentials', 'Breakfast table', 'Cat(s)', 'Ceiling hoist',
                       'Changing table', 'Children’s books and toys', 'Children’s dinnerware', 'Cooking basics',
                       'Crib', 'DVD player', 'Day bed', 'Dog(s)', 'Double oven', 'Electric profiling bed', 'En suite bathroom',
                       'Espresso machine', 'Exercise equipment', 'Extra pillows and blankets', 'Fax machine',
                         'Fire extinguisher', 'Fire pit', 'Fireplace guards', 'Firm mattress', 'Fixed grab bars for shower',
                         'Fixed grab bars for toilet', 'Flat path to front door', 'Formal dining area', 'Free parking on premises',
                         'Free street parking', 'Gym', 'HBO GO', 'Hammock', 'Handheld shower head', 'Hangers', 'Heat lamps', 'Heated towel rack', 'High chair',
                         'High-resolution computer monitor', 'Host greets you', 'Jetted tub', 'Keypad', 'Kitchenette', 'Memory foam mattress', 'Mobile hoist',
                         'Mountain view', 'Mudroom', 'Murphy bed', 'Netflix', 'Other', 'Other pet(s)', 'Outdoor kitchen',
                         'Outdoor parking', 'Outdoor seating', 'Outlet covers', 'Oven', 'Pack ’n Play/travel crib', 'Paid parking off premises',
                         'Paid parking on premises', 'Patio or balcony', 'Pocket wifi', 'Pool with pool hoist', 'Printer', 'Private pool',
                         'Projector and screen', 'Rain shower', 'Shared gym', 'Shared pool', 'Shower chair', 'Single level home',
                         'Ski-in/Ski-out', 'Smart TV', 'Smart lock', 'Soaking tub', 'Window guards', 'Sound system', 'Stair gates',
                         'Stand alone steam shower', 'Standing valet', 'Steam oven', 'Step-free access', 'Stove', 'Tennis court', 'Touchless faucets',
                         'Walk-in shower', 'Warming drawer', 'Wide clearance to bed', 'Wide clearance to shower', 'Wide doorway',
                         'Wide entryway', 'Wide hallway clearance', 'translation missing: en.hosting_amenity_49',
                         'translation missing: en.hosting_amenity_50', 'Well-lit path to entrance',
                         'Wheelchair accessible', ' toilet', 'Air purifier', 'BBQ grill', 'Bathtub', 'Beach view', 'Beachfront',
                         'Bed linens', 'Bedroom comforts', 'Bidet', 'Body soap', 'Building staff', 'Buzzer/wireless intercom',
                         'Cable TV', 'Carbon monoxide detector', 'Ceiling fan', 'Coffee maker', 'Convection oven', 'Disabled parking spot',
                         'Dishes and silverware', 'Doorman', 'Dryer', 'EV charger', 'Essentials',
                         'Ethernet connection', 'Game console', 'Garden or backyard', 'Gas oven', 'Ground floor access',
                         'Hair dryer', 'Hot tub', 'Toilet paper', 'Washer', 'Wine cooler', 'Sun loungers', 'TV', 'Table corner guards',
                         'Terrace', 'Shampoo', 'Pool cover', 'Private bathroom', 'Private entrance', 'Private hot tub',
                         'Private living room', 'Mini fridge', 'Pets allowed', 'Pets live on this property',
                         'Pillow-top mattress', 'Lockbox', 'Long term stays allowed', 'Luggage dropoff allowed', 'Bath towel',
                         'Bathroom essentials', 'Dining table', 'Dishwasher', 'Elevator', 'Hot water', 'Hot water kettle', 'Indoor fireplace', 'Laptop friendly workspace',
                         'Lock on bedroom door', 'Roll-in shower', 'Room-darkening shades', 'Safety card',
                         'Self check-in', 'Waterfront',]

In [None]:
for un_feature in unnecesary_features:
    features.remove(un_feature)

In [None]:
features

In [None]:
train_data['neighbourhood_cleansed'].value_counts()