In [426]:
import numpy as np
import pandas as pd
pd.options.display.max_columns = 200
pd.options.display.max_rows = 80
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score, train_test_split, ShuffleSplit
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder 
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from collections import defaultdict
import re

In [463]:
# Read-in airbnb listing.csv file
df = pd.read_csv('../data/amsterdam_2021-02-08_listings.csv.gz', compression='gzip')

# Drop columns which contains long text e.g. URL, description
df.drop(columns =['id', 'host_id', 'last_scraped', 'scrape_id', 'name', 'host_name', 'host_about', 'listing_url', 'description', 'neighborhood_overview', 'picture_url', 'host_url',
                  'host_thumbnail_url', 'host_picture_url', 'host_verifications', 'calendar_last_scraped', 'latitude', \
                  'longitude'],\
                  inplace=True)

In [335]:
df.head()

Unnamed: 0,host_since,host_location,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_neighbourhood,host_listings_count,host_total_listings_count,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,2008-09-24,"Amsterdam, Noord-Holland, The Netherlands",,,100%,t,Indische Buurt,1.0,1.0,t,t,"Amsterdam, North Holland, Netherlands",Oostelijk Havengebied - Indische Buurt,,52.36575,4.94142,Private room in apartment,Private room,2,,1.5 shared baths,1.0,2.0,$59.00,3,1125,3.0,3.0,1125.0,1125.0,3.0,1125.0,,t,29,46,60,139,278,1,0,2009-03-30,2020-02-14,98.0,10.0,10.0,10.0,10.0,9.0,10.0,,t,1,0,1,0,1.92
1,2009-12-02,"Amsterdam, Noord-Holland, The Netherlands",,,100%,f,Grachtengordel,2.0,2.0,t,t,"Amsterdam, North Holland, Netherlands",Centrum-Oost,,52.36509,4.89354,Private room in townhouse,Private room,2,,1 private bath,1.0,1.0,$129.00,1,365,1.0,4.0,60.0,1125.0,3.8,413.8,,t,0,0,0,0,339,19,0,2010-03-02,2020-04-09,89.0,10.0,10.0,10.0,10.0,10.0,9.0,,t,2,0,2,0,2.54
2,2009-11-20,"New York, New York, United States",,,0%,t,Grachtengordel,2.0,2.0,t,f,,Centrum-West,,52.37297,4.88339,Entire apartment,Entire home/apt,3,,1 bath,1.0,1.0,$125.00,14,180,14.0,14.0,180.0,180.0,14.0,180.0,,t,6,36,66,251,5,0,0,2018-01-21,2020-02-09,100.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,1,1,0,0,0.13
3,2010-03-23,"Amsterdam, Noord-Holland, The Netherlands",within an hour,100%,100%,t,Westelijke Eilanden,1.0,1.0,t,t,"Amsterdam, North Holland, Netherlands",Centrum-West,,52.38761,4.89188,Private room in houseboat,Private room,2,,1.5 baths,1.0,1.0,$125.00,2,730,2.0,2.0,1125.0,1125.0,2.0,1125.0,,t,27,55,79,136,219,6,0,2012-01-09,2020-07-25,99.0,10.0,10.0,10.0,10.0,10.0,10.0,,t,1,0,1,0,1.98
4,2010-05-13,"Amsterdam, Noord-Holland, The Netherlands",within an hour,100%,87%,t,Amsterdam Centrum,2.0,2.0,t,t,"Amsterdam, North Holland, Netherlands",Centrum-Oost,,52.3661,4.88953,Private room in apartment,Private room,2,,1 shared bath,1.0,1.0,$75.00,2,1825,2.0,2.0,1825.0,1825.0,2.0,1825.0,,t,30,53,78,338,336,13,0,2010-08-22,2020-09-20,97.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,2,0,2,0,2.63


In [464]:
# Keep columns with at least 80% in non-null values
thresh = len(df) * 0.80
df.dropna(thresh = thresh, axis = 1, inplace = True)

In [465]:
# Clean bathroom_text to remove text part and keep numeric part
df['bathrooms_cnt'] = pd.to_numeric(df['bathrooms_text'].str.split(" ", n = 1, expand = True)[0], errors = 'coerce')
df.drop(columns = 'bathrooms_text', inplace = True) 

# Convert to datetime
df['host_since'] = pd.to_datetime(df['host_since'])

# host_total_listing_count variable is present containing same value
df.drop(columns = 'host_listings_count', inplace = True) 

In [466]:
# Create dummy column for each item in amenities
col_value = 'amenities'
for i in df.index:          
    splitted_amenities = re.sub('[^a-zA-Z0-9,\n\.]', '', df.at[i, col_value]).split(',')
    
    for amenity in splitted_amenities:        
        amenity = (amenity.strip())        
        df.at[i, f'amenity_{amenity.lower()}'] = 1

df.fillna(0, inplace = True)
df.drop(columns = 'amenities', inplace = True) 

In [467]:
# Convert Price to numeric to remove $ sign and remove comma, if any
df['price'] = [float(value[1:-1].replace(',', '')) for value in df.price]

# Label encode the categorical columns
for i in df.select_dtypes(exclude= [np.number]).columns:  
    df.loc[:, f'{i}_encoded'] = pd.factorize(df[i])[0].reshape(-1, 1)
    df.drop(columns = i, inplace = True)

In [468]:
# Drop rows with any null values
thresh = len(df) * 1
df.dropna(inplace = True)

In [469]:
# Create Feature and Dependent variable data frame
x = df.drop(columns = ['price'])
y = df['price']

In [404]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 0)
#column_trans = make_column_transformer((OrdinalEncoder(), df.select_dtypes(exclude= [np.number]).columns),
#                                       remainder = 'passthrough')
#model = make_pipeline(column_trans, LinearRegression())

In [405]:
model = LinearRegression()

In [406]:
model.fit(x_train, y_train)
print('Linear regression score %f' %model.score(x_test, y_test))

Linear regression score 0.183878


In [470]:
rf = RandomForestRegressor(min_samples_leaf = 5, max_features = 'sqrt')
#rf.fit(x_train, y_train)
#print('R^2 Training Score: {:.2f} \nR^2 Validation Score: {:.2f}'.format(rf.score(x_train, y_train), rf.score(x_test, y_test)))

In [472]:
scores = defaultdict(list)
rs = ShuffleSplit(n_splits = 10, random_state = 100, test_size = 0.3)

for train_idx, test_idx in rs.split(y):
    x_train, x_test = x.iloc[train_idx], x.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    rf.fit(x_train, y_train)
    accuracy = r2_score(y_test, rf.predict(x_test))
    
    for i in range(x.shape[1]):
        x_t = x_test.copy()
        tmp = x_t.iloc[:,i]
        np.random.shuffle(tmp)
        shuff_acc = r2_score(y_test, rf.predict(tmp))
        scores[names[i]].append((accuracy-shuff)/accuracy)

print(sorted([(round(np.mean(score), 4), feature) for feature, score in scores.items()]), reverse = True)
    

KeyError: 5487

In [419]:
features = x.columns
print(sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), features), reverse = True))

[(0.0774, 'accommodates'), (0.0611, 'bedrooms'), (0.0583, 'beds'), (0.0479, 'bathrooms_cnt'), (0.039, 'host_since_encoded'), (0.0366, 'last_review_encoded'), (0.0328, 'neighbourhood_cleansed_encoded'), (0.0251, 'maximum_maximum_nights'), (0.0248, 'reviews_per_month'), (0.0247, 'number_of_reviews'), (0.0216, 'property_type_encoded'), (0.0203, 'review_scores_rating'), (0.0196, 'room_type_encoded'), (0.0195, 'maximum_nights'), (0.019, 'first_review_encoded'), (0.0184, 'calculated_host_listings_count_entire_homes'), (0.0181, 'minimum_nights_avg_ntm'), (0.0174, 'maximum_minimum_nights'), (0.016, 'availability_365'), (0.0158, 'amenity_dryer'), (0.0157, 'maximum_nights_avg_ntm'), (0.0147, 'minimum_nights'), (0.0146, 'availability_90'), (0.0139, 'minimum_maximum_nights'), (0.0127, 'minimum_minimum_nights'), (0.0127, 'availability_60'), (0.0127, 'amenity_bathtub'), (0.011, 'review_scores_value'), (0.011, 'review_scores_checkin'), (0.0106, 'calculated_host_listings_count_private_rooms'), (0.0105

In [18]:
cross_val_score(pipe, x, y, cv= 10, scoring = )

ColumnTransformer(n_jobs=None, remainder='passthrough', sparse_threshold=0.3,
                  transformer_weights=None,
                  transformers=[('ordinalencoder',
                                 OrdinalEncoder(categories='auto',
                                                dtype=<class 'numpy.float64'>),
                                 Index(['last_scraped', 'host_since', 'host_location', 'host_response_time',
       'host_response_rate', 'host_acceptance_rate', 'host_is_superhost',
       'host_neighbourhood', 'host_has_profile_pic', 'host_identity_verified',
       'neighbourhood', 'neighbourhood_cleansed', 'property_type', 'room_type',
       'bathrooms_text', 'has_availability', 'calendar_last_scraped',
       'first_review', 'last_review', 'instant_bookable'],
      dtype='object'))],
                  verbose=False)

In [410]:
df.head()

Unnamed: 0,host_total_listings_count,accommodates,bedrooms,beds,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,bathrooms_cnt,amenity_hangers,amenity_coffeemaker,amenity_paidparkingonpremises,amenity_longtermstaysallowed,amenity_firstaidkit,amenity_bedlinens,amenity_lockonbedroomdoor,amenity_privateentrance,amenity_carbonmonoxidealarm,amenity_dedicatedworkspace,amenity_hostgreetsyou,amenity_singlelevelhome,amenity_extrapillowsandblankets,amenity_hotwater,amenity_paidparkingoffpremises,amenity_heating,amenity_gardenorbackyard,amenity_hairdryer,amenity_essentials,amenity_smokealarm,amenity_washer,amenity_refrigerator,amenity_iron,amenity_shampoo,amenity_ethernetconnection,amenity_wifi,amenity_fireextinguisher,amenity_freestreetparking,amenity_tv,amenity_elevator,amenity_cookingbasics,amenity_dishesandsilverware,amenity_oven,amenity_dishwasher,amenity_kitchen,amenity_cabletv,amenity_microwave,amenity_stove,amenity_indoorfireplace,amenity_dryer,amenity_patioorbalcony,amenity_luggagedropoffallowed,amenity_breakfast,amenity_lakeaccess,amenity_waterfront,amenity_smartlock,amenity_showergel,amenity_keypad,amenity_lockbox,amenity_soundsystem,amenity_crib,amenity_portablefans,amenity_nespressomachine,amenity_laundromatnearby,amenity_freezer,amenity_highchair,amenity_evcharger,amenity_mariestellamarisshampoo,amenity_babysitterrecommendations,amenity_roomdarkeningshades,amenity_washeru2013u00a0inbuilding,amenity_paidstreetparkingoffpremises,amenity_bathtub,amenity_wifiu2013250mbps,amenity_dryeru2013inbuilding,amenity_mariestellamarisconditioner,amenity_toaster,...,amenity_googlebluetoothsoundsystem,amenity_electroluxrefrigerator,amenity_ikeainductionstove,amenity_ahbodysoap,amenity_ahconditioner,amenity_limitedhousekeepingu2014,amenity_housebikes,amenity_onsitebar,amenity_linens,amenity_selfparkingu2014u20ac10day,amenity_gameroom,amenity_laundryservices,amenity_freewifi,amenity_bedsheetsandpillows,amenity_toiletries,amenity_complimentarycontinentalbreakfast,amenity_petsallowed,amenity_continentalbreakfastavailableu2014u20ac13perpersonperday,amenity_restaurant,amenity_valetparkingu2014u20ac35day,amenity_housebrandbodysoap,amenity_soundsystemwithbluetoothandaux,amenity_paidparkinggarageonpremisesu201360spaces,amenity_onsitebaru2014hotelbar,amenity_complimentarybreakfastbuffet,amenity_roomservice,amenity_minibar,amenity_complimentaryselfparking,amenity_concierge,amenity_bikerentalu2014u20ac12day,amenity_airportshuttle,amenity_selfparkingu2014u20ac20day,amenity_complimentarycookedtoorderbreakfast,amenity_slippers,amenity_selfparkingu2014u20ac48day,amenity_bottledwater,amenity_allinclusive,amenity_50tv,amenity_loccitanetoiletries,amenity_selfparkingu2014u20ac35day,amenity_complimentarybreakfast,amenity_fitnesscenter,amenity_zenologytoiletries,amenity_ueboomspeakerbluetoothsoundsystem,amenity_paidparkinggarageonpremisesu201310spaces,amenity_wifiu2013200mbps,amenity_clothingstoragedresserandwardrobe,amenity_tellmewhatyoupreferconditioner,amenity_windowacunit,amenity_tellmewhatyoupreferbodysoap,amenity_siemensstainlesssteeloven,amenity_borainductionstove,amenity_gaggenaustainlesssteeloven,amenity_siemensrefrigerator,amenity_savoirfairecosmeticsbodysoap,amenity_hbomax,amenity_sonysoundsystemwithbluetoothandaux,amenity_siemensstainlesssteelelectricstove,amenity_paidparkinggarageonpremisesu20131000spaces,amenity_siemensoven,amenity_siemensinductionstove,amenity_sonossoundsystemwithbluetoothandaux,amenity_stainlesssteelinductionstove,amenity_bangenolufsenbluetoothsoundsystem,amenity_marshallsoundsystemwithbluetoothandaux,amenity_breakfastbuffetavailableu2014u20ac15perpersonperday,amenity_bosesoundsystemwithbluetoothandaux,amenity_40hdtvwithstandardcable,amenity_smeggasstove,amenity_guhlshampoo,amenity_onsiterestaurantu2014canteenmopen247,amenity_housekeeping,amenity_continentalbreakfastavailableu2014u20ac10perpersonperday,amenity_paiddryeru2013inunit,amenity_paidparkinglotoffpremises,amenity_turndownservice,amenity_bathrobes,amenity_breakfastbuffetavailableforafee,amenity_bluetoothspeaker,amenity_onsitebaru2014barclaes,amenity_onsiterestaurantu2014momo,amenity_dedicatedworkspacemonitor,amenity_officechair,amenity_clothingstoragewardrobeandcloset,amenity_combimicrowaveovenstainlesssteeloven,amenity_43hdtv,amenity_bidet,amenity_samsungrefrigerator,host_since_encoded,host_location_encoded,host_is_superhost_encoded,host_has_profile_pic_encoded,host_identity_verified_encoded,neighbourhood_cleansed_encoded,property_type_encoded,room_type_encoded,has_availability_encoded,first_review_encoded,last_review_encoded,instant_bookable_encoded
0,1.0,2,1.0,2.0,59.0,3,1125,3.0,3.0,1125.0,1125.0,3.0,1125.0,29,46,60,139,278,1,0,98.0,10.0,10.0,10.0,10.0,9.0,10.0,1,0,1,0,1.92,1.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0
1,2.0,2,1.0,1.0,129.0,1,365,1.0,4.0,60.0,1125.0,3.8,413.8,0,0,0,0,339,19,0,89.0,10.0,10.0,10.0,10.0,10.0,9.0,2,0,2,0,2.54,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,1,0,0,1,1,0,0,1,1,0
2,2.0,3,1.0,1.0,125.0,14,180,14.0,14.0,180.0,180.0,14.0,180.0,6,36,66,251,5,0,0,100.0,10.0,10.0,10.0,10.0,10.0,10.0,1,1,0,0,0.13,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,1,0,0,1,2,2,1,0,2,2,1
3,1.0,2,1.0,1.0,125.0,2,730,2.0,2.0,1125.0,1125.0,2.0,1125.0,27,55,79,136,219,6,0,99.0,10.0,10.0,10.0,10.0,10.0,10.0,1,0,1,0,1.98,1.5,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,0,0,0,0,2,3,0,0,3,3,0
4,2.0,2,1.0,1.0,75.0,2,1825,2.0,2.0,1825.0,1825.0,2.0,1825.0,30,53,78,338,336,13,0,97.0,10.0,10.0,10.0,10.0,10.0,10.0,2,0,2,0,2.63,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,0,0,0,0,1,0,0,0,4,4,1


In [473]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18291 entries, 0 to 18290
Columns: 445 entries, host_total_listings_count to instant_bookable_encoded
dtypes: float64(419), int64(26)
memory usage: 62.2 MB
