#  This is the first pass at a MLM - using LinearRegression - to explore
#  the datasets and get a baseline

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2
import time

In [2]:
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score

In [3]:
# Download raw data from postgres for stage 1 ETL

conn_string = 'postgres://whnpmxwsiccrtg:53c453893549d2b1e6a4ff92e626a2a08ebcaff66678e50d33e3742f66e3e4f4@ec2-52-4-171-132.compute-1.amazonaws.com/d2ajro4cjr10lb'

db = create_engine(conn_string)
conn = db.connect()

start_time = time.time()
clean_listing = pd.read_sql_query('select * from "listings_full_clean"',con=conn)
calendar = pd.read_sql_query('select * from "calendar_clean"',con=conn)
amenities = pd.read_sql_query('select * from "amenities_clean"',con=conn)
print("PostGres Download Duration: {} seconds".format(time.time() - start_time))


PostGres Download Duration: 1.6350276470184326 seconds


In [4]:
# View clean_listing database
clean_listing.head()

Unnamed: 0,id,host_is_superhost,host_listings_count,host_identity_verified,neighbourhood_cleansed,property_type,room_type,accommodates,bathrooms,bedrooms,...,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,cancellation_policy,reviews_per_month,days_host
0,6422,1,1,1,District 6,House,Private room,2,1.0,2,...,10.0,10.0,10.0,10.0,10.0,10.0,0,strict_14_with_grace_period,4.74,3576
1,20847,1,2,0,District 7,House,Private room,2,1.0,1,...,9.0,10.0,9.0,10.0,9.0,9.0,0,strict_14_with_grace_period,1.28,3265
2,25341,1,2,0,District 7,House,Private room,2,1.0,1,...,10.0,10.0,10.0,10.0,9.0,9.0,0,strict_14_with_grace_period,0.97,3265
3,25613,1,1,1,District 6,House,Private room,2,1.0,1,...,10.0,10.0,10.0,10.0,10.0,10.0,1,moderate,4.27,3227
4,37306,0,1,0,District 17,House,Private room,2,1.0,1,...,10.0,10.0,10.0,10.0,10.0,10.0,0,strict_14_with_grace_period,0.97,3115


In [5]:
# View data types of full_listings
clean_listing.dtypes

id                               int64
host_is_superhost                int64
host_listings_count              int64
host_identity_verified           int64
neighbourhood_cleansed          object
property_type                   object
room_type                       object
accommodates                     int64
bathrooms                      float64
bedrooms                         int64
beds                           float64
bed_type                        object
price                          float64
security_deposit               float64
cleaning_fee                   float64
guests_included                  int64
minimum_nights                   int64
maximum_nights                   int64
minimum_minimum_nights           int64
maximum_minimum_nights           int64
minimum_maximum_nights           int64
maximum_maximum_nights           int64
minimum_nights_avg_ntm         float64
maximum_nights_avg_ntm         float64
availability_30                  int64
availability_60          

In [7]:
# Drop duplicated variables.
listing = clean_listing.drop(columns = ['price'])

In [8]:
# View amenities dataframe
amenities.head()

Unnamed: 0,id,tv,internet,wifi,air_conditioning,kitchen,free_parking_on_premises,pets_live_on_this_property,dogs,cats,...,beachfront,ski-in/ski-out,beach_view,high-resolution_computer_monitor,standing_valet,pool_with_pool_hoist,sun_loungers,electric_profiling_bed,hammock,mobile_hoist
0,6422,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,20847,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,25341,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,25613,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,37306,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# View calendar dataframe
calendar.head(10)

Unnamed: 0,id,day,month,price
0,6422,weekday,1,40.0
1,6422,weekday,2,40.0
2,6422,weekday,3,40.0
3,6422,weekday,4,40.0
4,6422,weekday,5,40.0
5,6422,weekday,6,40.0
6,6422,weekday,7,40.0
7,6422,weekday,8,40.0
8,6422,weekday,9,40.0
9,6422,weekday,10,40.0


In [10]:
# Merge amenities table to full listings.
merge1 = listing.merge(amenities, how='left', on ='id')

In [11]:
# Merge calendar table to amenities and full listings.
merged = merge1.merge(calendar, how='left', on ='id')
merged.tail(20)

Unnamed: 0,id,host_is_superhost,host_listings_count,host_identity_verified,neighbourhood_cleansed,property_type,room_type,accommodates,bathrooms,bedrooms,...,high-resolution_computer_monitor,standing_valet,pool_with_pool_hoist,sun_loungers,electric_profiling_bed,hammock,mobile_hoist,day,month,price
143884,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,5,264.0
143885,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,6,264.0
143886,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,7,264.0
143887,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,8,264.0
143888,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,9,264.0
143889,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,10,264.0
143890,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,11,264.0
143891,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekday,12,264.0
143892,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekend,1,307.75
143893,31632611,0,6,1,District 19,Apartment,Entire home/apt,10,2.0,2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,weekend,2,249.0


In [12]:
# Merge tables and drop 'id' (unique identifier - not relevant)
merged = merged.drop(columns = 'id')

In [13]:
# Create a copy of the database to work with going forward.
merged2 = merged.copy()

In [14]:
# Create a list of the object type columns.
objects = merged2.dtypes[merged2.dtypes == 'object'].index.tolist()
objects

['neighbourhood_cleansed',
 'property_type',
 'room_type',
 'bed_type',
 'cancellation_policy',
 'day']

In [15]:
# Encode objects to transform objects type columns to numeric values.
le = LabelEncoder()
for obj in objects:
    merged2[obj] = le.fit_transform(merged2[obj])

In [16]:
# View the merged dataframe
merged2.head()

Unnamed: 0,host_is_superhost,host_listings_count,host_identity_verified,neighbourhood_cleansed,property_type,room_type,accommodates,bathrooms,bedrooms,beds,...,high-resolution_computer_monitor,standing_valet,pool_with_pool_hoist,sun_loungers,electric_profiling_bed,hammock,mobile_hoist,day,month,price
0,1,1,1,31,17,1,2,1.0,2,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,40.0
1,1,1,1,31,17,1,2,1.0,2,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2,40.0
2,1,1,1,31,17,1,2,1.0,2,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,3,40.0
3,1,1,1,31,17,1,2,1.0,2,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,4,40.0
4,1,1,1,31,17,1,2,1.0,2,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,5,40.0


In [17]:
pd.pandas.set_option('display.max_columns', None)

In [18]:
merged2.head()

Unnamed: 0,host_is_superhost,host_listings_count,host_identity_verified,neighbourhood_cleansed,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,security_deposit,cleaning_fee,guests_included,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,cancellation_policy,reviews_per_month,days_host,tv,internet,wifi,air_conditioning,kitchen,free_parking_on_premises,pets_live_on_this_property,dogs,cats,heating,family/kid_friendly,washer,dryer,smoke_detector,carbon_monoxide_detector,first_aid_kit,safety_card,fire_extinguisher,essentials,shampoo,lock_on_bedroom_door,hangers,hair_dryer,iron,laptop_friendly_workspace,bathtub,children’s_books_and_toys,hot_water,bed_linens,extra_pillows_and_blankets,ethernet_connection,microwave,coffee_maker,refrigerator,dishes_and_silverware,garden_or_backyard,luggage_dropoff_allowed,long_term_stays_allowed,host_greets_you,cable_tv,breakfast,free_street_parking,indoor_fireplace,wide_hallway_clearance,wide_doorway,well-lit_path_to_entrance,disabled_parking_spot,step-free_access,wide_clearance_to_bed,wide_entryway,dishwasher,single_level_home,accessible-height_toilet,full_kitchen,paid_parking_off_premises,elevator,24-hour_check-in,self_check-in,smart_lock,private_entrance,lockbox,hot_tub,keypad,babysitter_recommendations,pocket_wifi,cooking_basics,oven,stove,bbq_grill,wheelchair_accessible,suitable_for_events,fireplace_guards,patio_or_balcony,private_living_room,pool,pets_allowed,gym,outlet_covers,high_chair,pack_’n_play/travel_crib,room-darkening_shades,children’s_dinnerware,accessible-height_bed,other,smoking_allowed,wide_clearance_to_shower,_toilet,fixed_grab_bars_for_shower,buzzer/wireless_intercom,body_soap,bath_towel,toilet_paper,ceiling_fan,smart_tv,jetted_tub,terrace,balcony,convection_oven,netflix,memory_foam_mattress,en_suite_bathroom,bedroom_comforts,bathroom_essentials,doorman,lake_access,other_pets,flat_path_to_front_door,handheld_shower_head,private_bathroom,crib,waterfront,kitchenette,mini_fridge,fire_pit,outdoor_seating,walk-in_shower,building_staff,cleaning_before_checkout,firm_mattress,baby_bath,changing_table,hot_water_kettle,central_air_conditioning,breakfast_table,murphy_bed,outdoor_parking,ev_charger,window_guards,shower_chair,stair_gates,game_console,baby_monitor,roll-in_shower,fixed_grab_bars_for_toilet,paid_parking_on_premises,sound_system,gas_oven,formal_dining_area,beach_essentials,table_corner_guards,air_purifier,rain_shower,bathtub_with_bath_chair,day_bed,pillow-top_mattress,ground_floor_access,dvd_player,warming_drawer,printer,mudroom,amazon_echo,shared_pool,hbo_go,espresso_machine,heated_floors,alfresco_bathtub,steam_oven,double_oven,wine_cooler,soaking_tub,beachfront,ski-in/ski-out,beach_view,high-resolution_computer_monitor,standing_valet,pool_with_pool_hoist,sun_loungers,electric_profiling_bed,hammock,mobile_hoist,day,month,price
0,1,1,1,31,17,1,2,1.0,2,3.0,4,0.0,0.0,1,1,730,1,1,730,730,1.0,730.0,1,7,10,133,561,131,99.0,10.0,10.0,10.0,10.0,10.0,10.0,0,3,4.74,3576,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,40.0
1,1,1,1,31,17,1,2,1.0,2,3.0,4,0.0,0.0,1,1,730,1,1,730,730,1.0,730.0,1,7,10,133,561,131,99.0,10.0,10.0,10.0,10.0,10.0,10.0,0,3,4.74,3576,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2,40.0
2,1,1,1,31,17,1,2,1.0,2,3.0,4,0.0,0.0,1,1,730,1,1,730,730,1.0,730.0,1,7,10,133,561,131,99.0,10.0,10.0,10.0,10.0,10.0,10.0,0,3,4.74,3576,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,3,40.0
3,1,1,1,31,17,1,2,1.0,2,3.0,4,0.0,0.0,1,1,730,1,1,730,730,1.0,730.0,1,7,10,133,561,131,99.0,10.0,10.0,10.0,10.0,10.0,10.0,0,3,4.74,3576,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,4,40.0
4,1,1,1,31,17,1,2,1.0,2,3.0,4,0.0,0.0,1,1,730,1,1,730,730,1.0,730.0,1,7,10,133,561,131,99.0,10.0,10.0,10.0,10.0,10.0,10.0,0,3,4.74,3576,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,5,40.0


In [19]:
# Define the target variable - output value.
y=merged2['price']


In [20]:
# Define the features - input values.
X = merged2.drop(columns ='price')

X.shape

(143904, 211)

In [22]:
#  reshape the variables for sklearn format numpy array
X = X.values.reshape(-1, 211)

In [23]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [24]:
# Scale the train and test feature data.
data_scaler = StandardScaler()
X_train_scaled = data_scaler.fit_transform(X_train)
X_test_scaled = data_scaler.fit_transform(X_test)


In [25]:
# instantiate the model

regr = linear_model.LinearRegression()


In [26]:
# Train the linear regression model on the train data.
regr.fit(X_train_scaled, y_train)

LinearRegression()

In [27]:
# Calculate the predicted prices for scaled_X using the model.
y_pred = regr.predict(X_test_scaled)
output = pd.DataFrame({"Prediction":y_pred,"Actual":y_test})
print(output)

         Prediction       Actual
61376     81.485387   160.000000
76315    375.212809   369.600000
122180   387.195816   244.500000
22884    149.601169   110.250000
101631  1569.087788  3673.750000
...             ...          ...
129731   484.845907   294.173913
130666   425.970932   403.650000
111350   296.077803   280.000000
30691    145.292155    64.000000
4084      51.328383   275.000000

[35976 rows x 2 columns]


In [28]:
# Calculate the accuracy score of the initial model.
from sklearn.metrics import r2_score


#regr.score(X, y, sample_weight=None)

model_r2 = r2_score(y_test,y_pred)
model_r2


0.5792199680977473

In [29]:
from sklearn.metrics import mean_squared_error
MSE = mean_squared_error(y_test, y_pred)
MSE

29243.425133766003

In [30]:
# Print y-intercepts and coefficients.
print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)

Intercept: 
 268.7165867443901
Coefficients: 
 [ 5.22504356e+00  1.35763397e+01  3.95111197e+00 -1.05875264e+01
  5.30830956e-01 -3.91494291e+00  6.57315883e+01  7.74196667e+01
 -2.59067635e+00  2.81143530e+01  9.17246741e-01  1.37370959e+01
  3.98563799e+01 -1.42964212e+01 -1.54285696e+01  6.19899153e+02
  3.17031543e+01  3.34273407e+00 -1.21176626e+02 -7.08485642e+02
 -3.05513974e+01  2.12440934e+02 -1.14070223e+01  3.77972886e+00
  1.20213084e+01 -2.21711704e+00 -2.43716872e-01  1.71019880e+00
  2.12768798e+01 -1.50331697e+01  2.77730615e+01 -1.38776606e+01
 -3.38797063e+01  4.86079061e+01 -4.69422903e+01 -6.80140534e+00
  2.69293571e-02 -6.46659127e+00  2.78800715e+00  1.04867531e+00
  7.31697732e+00 -4.38966194e+00  2.06540416e+00 -4.77893175e+00
 -9.52450518e+00  2.26740606e+00 -8.80388374e-02 -9.80409078e-01
 -1.00765569e+00 -3.95153321e+00  3.30944709e+01 -3.46036506e+01
  5.16579619e+00  1.46799985e+00 -2.14502254e+00  1.04273876e-01
 -3.21418577e+00 -9.54100278e+00  2.0168887

In [31]:
Coef = {}

In [32]:
# Add each coefficient and label to a dictionary
Coef = {}
for i in range(len(merged2.columns)-1):
    Coef[merged2.columns[i]] = regr.coef_[i]
    

In [33]:
# Show coefficients.
Coef

{'host_is_superhost': 5.225043555489012,
 'host_listings_count': 13.576339721990184,
 'host_identity_verified': 3.951111965385956,
 'neighbourhood_cleansed': -10.58752642972727,
 'property_type': 0.5308309563489031,
 'room_type': -3.914942909201321,
 'accommodates': 65.73158834758027,
 'bathrooms': 77.4196666740462,
 'bedrooms': -2.590676346861896,
 'beds': 28.11435302237344,
 'bed_type': 0.9172467412816823,
 'security_deposit': 13.737095922677053,
 'cleaning_fee': 39.8563798995649,
 'guests_included': -14.296421234370936,
 'minimum_nights': -15.428569608908425,
 'maximum_nights': 619.8991530380954,
 'minimum_minimum_nights': 31.703154294877816,
 'maximum_minimum_nights': 3.3427340662209732,
 'minimum_maximum_nights': -121.17662606751928,
 'maximum_maximum_nights': -708.4856419181607,
 'minimum_nights_avg_ntm': -30.55139741107511,
 'maximum_nights_avg_ntm': 212.44093359662048,
 'availability_30': -11.407022264609797,
 'availability_60': 3.7797288573764836,
 'availability_90': 12.021308